In [30]:
import numpy as np 
import pandas as pd 
import sklearn.pipeline
from sklearn.kernel_approximation import RBFSampler
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import params 
import random
import matplotlib.pyplot as plt 

Dr Yue Jin's code

In [31]:
# parameters stored in params.py
workload_max = 30
workload_min = 10

number_VM_max = 11
number_VM_min = 2

response_time_intercept = [-338.8173500, -99.5766850, 185.0698700, 257.5648000, 397.3988700, 375.8291500, 371.0434800, 423.1175400, 377.6604100, 368.1021800]
response_time_coefficients = [310.9830100, 196.6712300, 113.6043900, 91.8822210, 66.1391060, 62.3167310, 58.4447950, 52.3450650, 53.0440920, 52.8421350]

utility_fixed_revenue = 30
utility_coefficients = [1.0, 200/2000]
response_time_SLA = 2000
In [32]:
# I include all the relevant codes in this file. The codes are from multiple files. You will need to make necessary adjustments, e.g. separating them back to multiple files or changing name spaces.

# function to calculate response time
def calculate_response_time(number_VM, workload):
    i = number_VM - params.number_VM_min
    response_time = params.response_time_intercept[i] + params.response_time_coefficients[i] * workload
        
    return response_time

# read and scale the work loads    
raw_workload_list = np.genfromtxt('milano_callin_r.txt', skip_header = 1).flatten(1)[0:8502,]
raw_workload_max = raw_workload_list.max()
raw_workload_min = raw_workload_list.min()
workload_scaling = (params.workload_max - params.workload_min)/(raw_workload_max - raw_workload_min)
workload_list = (raw_workload_list - raw_workload_min) * workload_scaling + params.workload_min
workload_length = 8502

# function to calculate rewards
def utility_linear_cost(metrics):
    utility = params.utility_fixed_revenue - (params.utility_coefficients[0] * metrics["number_VM"]  + params.utility_coefficients[1] * max(metrics["response_time"] - params.response_time_SLA, 0))
    
    return utility
    
/home/siddhant/python/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:11: DeprecationWarning: Non-string object detected for the array ordering. Please pass in 'C', 'F', 'A', or 'K' instead
  # This is added back by InteractiveShellApp.init_path()

Build Environment

In [33]:
class cloud:
    def __init__(self,initial_wl,initial_vm):
        #all parameters to be accessed using params.py
        #params.workload_max
        
        

        
        #init states
        self.num_vm = initial_vm
        self.wl = initial_wl
        self.initial_state = (initial_wl,initial_vm)
        self.rew = 0
        
        #init action space 
        self.actions = [-2,-1,0,1,2] #-2 means scaling down number of vms by 2. 
        
        print("Current state : " , self.initial_state)
        print("Possible actions: " , self.actions)
        print("Current number of VMs: " , self.num_vm)
        
    
    def reset_env(self):
        
        self.wl,self.num_vm = self.initial_state
        #self.initial_state = (self.wl,self.num_vm)
        
        return self.initial_state
    
    
    def action_space(self,action):
        
        act_adjusted = 'NA'
        #scale down number of VMs by 2
        
        if action == 0:
            projected_num_vm = self.num_vm - 2
            if projected_num_vm < params.number_VM_min:
                act_adjusted = self.num_vm - params.number_VM_min
                print('Action -2 not possible so Scaled down by',act_adjusted)
                self.num_vm = self.num_vm - act_adjusted
                if act_adjusted == 0:
                    act_adjusted = 2
                else:
                    act_adjusted = 1
                
            else:
                self.num_vm = projected_num_vm

        #scale down number of VMs by 1
        if action == 1:
            projected_num_vm = self.num_vm - 1
            if projected_num_vm < params.number_VM_min:
                act_adjusted = self.num_vm - params.number_VM_min
                print('Action -2 not possible so Scaled down by',act_adjusted)
                self.num_vm = self.num_vm - act_adjusted
                if act_adjusted == 0:
                    act_adjusted = 2
                else:
                    act_adjusted = 1
                
            else:
                self.num_vm = projected_num_vm
        

        #scale up/down number of VMs by 0 i.e. do nothing
        if action == 2:
            pass     #do nothing
        
        #scale up number of VMs by 1
        if action == 3:
            projected_num_vm = self.num_vm + 1
            if projected_num_vm > params.number_VM_max:
                act_adjusted = params.number_VM_max - self.num_vm
                print('Action +1 not possible so Scaled up by',act_adjusted)
                self.num_vm = self.num_vm + act_adjusted
                #map action into 0,1,2,3
                if act_adjusted == 0:
                    act_adjusted = 2
                else:
                    act_adjusted = 3
            else:
                self.num_vm = projected_num_vm

        #scale up number of VMs by 2
        if action == 4:
            projected_num_vm = self.num_vm + 2
            if projected_num_vm > params.number_VM_max:
                act_adjusted = params.number_VM_max - self.num_vm
                print('Action +2 not possible so Scaled up by',act_adjusted)
                self.num_vm = self.num_vm + act_adjusted
                if act_adjusted == 0:
                    act_adjusted = 2
                else:
                    act_adjusted = 3                
            else:
                self.num_vm = projected_num_vm

        if act_adjusted != 'NA':
            action = act_adjusted
            
        return self.num_vm,action
        

    
    def reward(self,curr_wl_state,curr_num_vm):
        
        #take action and decide the reward
        #fix the wl to current wl
        #next_num_vm,act = self.action_space(action)
        
        reward = params.utility_fixed_revenue - (params.utility_coefficients[0] * curr_num_vm  + params.utility_coefficients[1] 
                * max(calculate_response_time(curr_num_vm,curr_wl_state) - params.response_time_SLA, 0))
        
        return reward
    
    
    def step(self,action,curr_wl_state,curr_num_vm):
        
        curr_RT = calculate_response_time(curr_num_vm,curr_wl_state)
        
         
        #rw = self.reward(curr_wl_state,curr_num_vm)
        #self.rew = rw


        #run and get the reward,state
        next_num_vm,act = self.action_space(action)
        

        #also return response time 
        next_ind = np.where(workload_list == curr_wl_state)[0][0] + 1
        if next_ind == len(workload_list):
            next_ind = 0
            
        nxt_RT = calculate_response_time(next_num_vm,workload_list[next_ind])
        
        rw = self.reward(workload_list[next_ind],next_num_vm)
        self.rew = rw
        print("Current State,action,reward,Response time,Next State: " , (curr_num_vm,curr_wl_state),action,self.rew,curr_RT
              ,(workload_list[next_ind],next_num_vm))
        
        #return state,action,reward,next state

        return (curr_wl_state,curr_num_vm),act,self.rew,nxt_RT,(workload_list[next_ind],next_num_vm)
        
        
In [34]:
## Build features for input states 
## mapping them into high dimensions would produce a better estimate using neural networks

# builds mercer kernel 
featurizer = sklearn.pipeline.FeatureUnion([
        ("rbf1", RBFSampler(gamma=5.0, n_components=100)),
        ("rbf2", RBFSampler(gamma=2.0, n_components=100)),
        ("rbf3", RBFSampler(gamma=1.0, n_components=100)),
        ("rbf4", RBFSampler(gamma=0.5, n_components=100))
        ])
featurizer.fit(np.random.choice(workload_list,1000).reshape(-1,1))


#data_set = featurizer.transform(workload_list.reshape(-1,1))
Out[34]:
FeatureUnion(n_jobs=1,
       transformer_list=[('rbf1', RBFSampler(gamma=5.0, n_components=100, random_state=None)), ('rbf2', RBFSampler(gamma=2.0, n_components=100, random_state=None)), ('rbf3', RBFSampler(gamma=1.0, n_components=100, random_state=None)), ('rbf4', RBFSampler(gamma=0.5, n_components=100, random_state=None))],
       transformer_weights=None)

BUILD NEURAL NETWORK TO ESTIMATE Q VALUES

In [35]:
class NNet(nn.Module):
    
    """Initializes a simple sequential Neural network model 
     To do : complete the doc """

    def __init__(self,in_dimension = 401, num_hidden_nodes = 20,action_space = 5):
        
        # N is batch size; D_in is input dimension;
        # H is hidden dimension; D_out is output dimension.
        super(NNet,self).__init__()
        #N = x_train.shape[0] not necessary to provide its dynamic
        D_in, H, D_out =  in_dimension, num_hidden_nodes, action_space

        # Create random Tensors to hold inputs and outputs
        #self.x = torch.tensor(x_train).float()
        #self.y = torch.tensor(y_train).float()


        self.h1 = nn.Linear(D_in,H)
        self.out = nn.Linear(H,D_out)
        
        #declare a loss function
        #self.loss_fn = torch.nn.MSELoss(size_average=False)
        

        
    def forward(self,x):
        x = self.h1(x)
        x = F.tanh(x)
        x = self.out(x)
        
        return x
        

    """ Fit the model using backprop 
        Input X_train and target vals Y    """
    
    def fit(self,x_tr,y_tr,learning_rate = 0.004,iterations = 5000):
    
        opt = optim.Adam(params=obj.parameters(), lr= learning_rate)
        loss_list = []
        for i in range(iterations):
            res = self(x_tr) #out values
            _,pred = res.max(1) #prediction
            loss = F.mse_loss(res,y_tr) #calculate loss between output and target value
            loss_list.append(loss)
            print('loss' , loss.item())
            self.zero_grad()
            loss.backward()
            opt.step()

        return res,pred,loss_list

    

    """ Converts input state to featurized and predicts the q values for a given state. Only to be used inside
        epsilon greedy policy """
        
    def predict(self,state):
        w,n = state
#       w = self.state_to_highdim(w)
        w = torch.tensor(np.append(self.state_to_highdim(w),n)).float()
        res = self(w)
        res = res.reshape(1,res.shape[0])
        return res.max(1)[1].item()
        

    def state_to_highdim(self,val):
        tn = featurizer.transform(val)
        return torch.tensor(tn).float()
    
    """ Print model parameters"""

    def print_model_params(self):
        for name, param in self.named_parameters():
            if param.requires_grad:
                print (name, param.data)
In [37]:
learning_rate = 0.001
opt = optim.Adam(params=current_net.parameters(), lr= learning_rate)
loss_list = []


def optimizer():
    #target net , current net 
    batch_dct = mem_obj.sample_batch()
    if(batch_dct==None):return
    
    batch_actions = []
    batch_reward = []
    batch_states_wl = [] #current state workload
    batch_states_numvm = [] #current state number of vms 
    batch_next_states_wl = [] #next state workload
    batch_next_states_numvm = [] #next state numvm 
    
    for i in range(len(batch_dct)):
        batch_states_wl.append(batch_dct[i][0][0])
        batch_states_numvm.append(batch_dct[i][0][1])
        batch_actions.append(batch_dct[i][1])
        batch_reward.append(batch_dct[i][2])
        batch_next_states_wl.append(batch_dct[i][3][0])
        batch_next_states_numvm.append(batch_dct[i][3][1])
    
    #compute a forward pass with current states
    vec = current_net.state_to_highdim(np.array(batch_states_wl).reshape(-1,1))
    tn_vec = torch.tensor(np.concatenate((vec,np.array(batch_states_numvm).reshape(len(batch_dct),1)),axis = 1)).float()
    
    batch_predictions = current_net(tn_vec)
    action_indices = np.array(batch_actions).reshape(len(batch_actions),1)
    q_estimated = batch_predictions.gather(1,torch.tensor(action_indices)) #select q values according to the actions taken
    
    batch_target_predictions = torch.zeros(len(batch_dct), device=device)
    
    vec2 = current_net.state_to_highdim(np.array(batch_next_states_wl).reshape(-1,1))
    tn_vec2 = torch.tensor(np.concatenate((vec2,np.array(batch_next_states_numvm).reshape(len(batch_dct),1)),axis = 1)).float()

    batch_target_predictions = target_net(tn_vec2).max(1)[0].detach() #tensor of max q values length = batch size
    
    q_targets = (batch_target_predictions * GAMMA) + torch.tensor(batch_reward)
    
    #now optimise the loss between q targets and q estimated
    
    loss = F.mse_loss(q_targets,q_estimated) #calculate loss between output and target value
    loss_list.append(loss)
    print('loss' , loss.item())
    opt.zero_grad()
    loss.backward()
    opt.step()
    

EXPERIENCE REPLAY

In [38]:
class memory:
    
    def __init__(self,batch_size = 100,mem_size = 1000):
        
        self.mem_size = mem_size
        self.batch_size = batch_size
        self.mem_dct = {}
        
        for i in range(mem_size):
            self.mem_dct[i] = []
        
        self.counter = 0
        self.mem_flag = 0
        
        
    def update_memory(self,input_list):     #state,action,reward,next state
        
        if(self.counter!=self.mem_size):
            self.mem_dct[self.counter] = input_list
            self.counter = self.counter + 1
        else:
            self.counter = 0               # rewrite the oldest entry 
            self.mem_flag = 1              # to indicate if memory is full 
            self.mem_dct[self.counter] = input_list
            
    
    def sample_batch(self):
        if(self.mem_flag==1):
            rand_ind = np.random.choice(self.mem_size,self.batch_size,replace=False) #sample 100 random numbers from the memory size of 1000
            batch_dct = {}
            cnt = 0
            for key in rand_ind:
                batch_dct[cnt] = self.mem_dct[key] #store sampled batch in batch_dct
                cnt = cnt + 1
                
        else:return None
        
        return batch_dct
        
In [454]:
mem_obj = memory(10,20)

Q learning loop

In [39]:
# epsilon-greedy policy  
def eps_greedy(epsilon,state):
    rn = random.uniform(0,1)
    
    # maximize
    if(rn > epsilon):
        action = current_net.predict(state)
    else:
        #print('random action')
        action = np.random.choice([0,1,2,3,4]) # 0th action = -2 , 1st action = -1 , 2nd action = 0 , 3rd action = 1, 4th action = 2
    
    return action
In [40]:
### DECLARE ALL CLASS OBJECTS ######

#Neural network objects 
current_net = NNet()
target_net = NNet()

#memory object 
mem_obj = memory()

#environment object 
cloud_obj = cloud(workload_list[15],10)

#q learning variables
#hyper parameters 
total_episodes = 1000        # Total episodes
#learning_rate = 0.8           # Learning rate
max_steps = 100                # Max steps per episode
GAMMA = 0.5                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.01             # Exponential decay rate for exploration prob

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Current state :  (10.58735855349979, 10)
Possible actions:  [-2, -1, 0, 1, 2]
Current number of VMs:  10
In [41]:
episodic_reward = [] #list of rewards at the end of each episode
all_steps = []
curr_state_list = []
reward_list = []
action_list = []
RT_list = []

for i in range(1,total_episodes):
    #at beginning of every episode reset the environment
    state = cloud_obj.reset_env()
    total_rewards = 0
    total_steps = 0
    count = 0
    
    print("############ Running episode number: " + str(i) +"  ##############") 
    for step in range(max_steps):

        action = eps_greedy(epsilon,state) #choose action according to epsilon-greedy
        _,curr_num_vm = state

        #get the current workload 
        if count==workload_length:
            count = 0

        curr_wl = workload_list[count]
        count = count + 1
        # now take a step in the environment 
        state,action,rew,RT,next_state = cloud_obj.step(action,curr_wl,curr_num_vm)  #1returns next state, reward                                                     

        #store transition
        mem_obj.update_memory([state,action,rew,next_state])
        ##optimize model
        optimizer()

        total_rewards = total_rewards + rew

        curr_state_list.append(state)
        reward_list.append(rew) #reward at every time step
        action_list.append(action)
        RT_list.append(RT)
        state = next_state

        #if(done=='True'):
        #    break
        
    #update target net 
    if i % 10 == 0:
        target_net.load_state_dict(current_net.state_dict())

    epsilon = 1*np.exp(-decay_rate*i)
    episodic_reward.append(total_rewards)
    all_steps.append(step)
############ Running episode number: 1  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 4 19.0 1023.46894667 (11.336751742492702, 11)
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 0 22.0 951.434021987 (10.819208572963639, 8)
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 2 22.0 1003.36990711 (10.768325938188134, 8)
Current State,action,reward,Response time,Next State:  (8, 10.768325938188134) 2 22.0 1000.39608195 (10.772009508959538, 8)
Current State,action,reward,Response time,Next State:  (8, 10.772009508959538) 4 20.0 1000.61136749 (10.644925616761762, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 4 19.0 942.310823749 (10.58735855349979, 11)
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 2 21.0 975.508144832 (10.553846649940214, 9)
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 1 22.0 975.559328891 (10.489125480251131, 8)
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 2 22.0 984.078268423 (10.448897752470936, 8)
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 2 22.0 981.727167119 (10.433149880183072, 8)
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 4 20.0 980.806785952 (10.44185150623065, 10)
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 1 21.0 931.538941947 (10.370942817486826, 9)
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 1 22.0 965.985215893 (10.42733414151318, 8)
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 4 20.0 980.466886297 (10.388469398680568, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 4 19.0 928.707336523 (10.344006106602812, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 1 21.0 924.133757854 (10.278181486298042, 9)
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 4 19.0 922.331700166 (10.335411397720526, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 0 22.0 927.721874973 (10.316955310454549, 8)
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 4 20.0 974.015818144 (10.333617326102203, 10)
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 2 20.0 925.797758139 (10.390165524255663, 10)
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 0 22.0 928.797305964 (10.425974763084863, 8)
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 1 23.0 980.387437704 (10.546025383098053, 7)
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 2 23.0 1033.02297692 (10.655373370049301, 7)
Current State,action,reward,Response time,Next State:  (7, 10.655373370049301) 2 23.0 1039.83718601 (10.624473674922116, 7)
Current State,action,reward,Response time,Next State:  (7, 10.624473674922116) 3 22.0 1037.91161802 (10.771376986314287, 8)
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 2 22.0 1000.57439983 (10.924797168745895, 8)
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 0 24.0 1009.54101094 (11.039747673816453, 6)
Current State,action,reward,Response time,Next State:  (6, 11.039747673816453) 4 22.0 1127.55791161 (11.271571944085663, 8)
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 4 20.0 1029.8081916 (11.670334358779868, 10)
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 0 22.0 1024.75516863 (12.501496275411796, 8)
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 2 22.0 1101.69086701 (13.168618569876575, 8)
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 0 24.0 1140.68069275 (13.649658108197247, 6)
Current State,action,reward,Response time,Next State:  (6, 13.649658108197247) 4 22.0 1300.17505448 (14.283719188889453, 8)
Current State,action,reward,Response time,Next State:  (8, 14.283719188889453) 2 22.0 1205.85251983 (14.677479537099185, 8)
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 3 21.0 1228.86576266 (15.353965082180355, 9)
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 4 19.0 1226.82184023 (15.836943704090487, 11)
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 2 20.0 1251.130943 (16.871606159345866, 10)
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 1 21.0 1272.5994393 (17.534967586021782, 9)
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 1 22.0 1340.98655806 (17.669285735563751, 8)
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 1 23.0 1403.72126261 (17.944480812078613, 7)
Current State,action,reward,Response time,Next State:  (7, 17.944480812078613) 3 22.0 1494.0705337 (18.385807405229915, 8)
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 4 20.0 1445.59822471 (18.671267839956315, 10)
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 0 22.0 1368.06085906 (19.02839494033929, 8)
Current State,action,reward,Response time,Next State:  (8, 19.02839494033929) 4 20.0 1483.15412147 (19.286321916040979, 10)
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 1 22.0 1437.85991935 (19.223969507401588, 8)
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 0 24.0 1494.58443695 (19.25591252280865, 6)
Current State,action,reward,Response time,Next State:  (6, 19.25591252280865) 3 23.0 1670.96770947 (19.08360399753829, 7)
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 1 24.0 1565.05696683 (18.668181536495972, 6)
Current State,action,reward,Response time,Next State:  (6, 18.668181536495972) 1 25.0 1632.09570747 (18.375894992990247, 5)
Current State,action,reward,Response time,Next State:  (5, 18.375894992990247) 2 25.0 1945.98284482 (17.82724819986867, 5)
Current State,action,reward,Response time,Next State:  (5, 17.82724819986867) 1 11.75512286 1895.57195892 (17.229782241685768, 4)
Current State,action,reward,Response time,Next State:  (4, 17.229782241685768) 3 25.0 2142.4487714 (16.84211602880065, 5)
Current State,action,reward,Response time,Next State:  (5, 16.84211602880065) 0 -82.3792672693 1805.05582707 (16.237094554670044, 3)
Current State,action,reward,Response time,Next State:  (3, 16.237094554670044) 1 -234.157767166 3093.79267269 (15.950694610794756, 2)
Current State,action,reward,Response time,Next State:  (2, 15.950694610794756) 2 -230.364071496 4621.57767166 (15.828704162850809, 2)
Current State,action,reward,Response time,Next State:  (2, 15.828704162850809) 2 -221.722754431 4583.64071496 (15.550833128512703, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.550833128512703) 0 -218.484233892 4497.22754431 (15.446694946204717, 2)
Current State,action,reward,Response time,Next State:  (2, 15.446694946204717) 4 26.0 4464.84233892 (15.750501603468638, 4)
Current State,action,reward,Response time,Next State:  (4, 15.750501603468638) 3 25.0 1974.39599686 (15.817158911312735, 5)
Current State,action,reward,Response time,Next State:  (5, 15.817158911312735) 3 24.0 1710.88049068 (15.829956988360925, 6)
Current State,action,reward,Response time,Next State:  (6, 15.829956988360925) 4 22.0 1444.37807323 (15.892373986997768, 8)
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 1 23.0 1299.87001973 (15.954793861767499, 7)
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 3 22.0 1370.07974724 (16.004586266677634, 8)
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 4 20.0 1306.42824342 (16.017694914042416, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 4 19.0 1227.30449265 (15.947547279389703, 11)
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 4 19.0 1232.44771583 (16.147078378791146, 11)
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 4 19.0 1272.63886489 (16.295120821876548, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 1 21.0 1282.21925533 (17.215992726625572, 9)
############ Running episode number: 2  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 2 21.0 1016.54054685 (11.25610796929319, 9)
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 2 21.0 1012.3192433 (11.027107764209074, 9)
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 2 21.0 1000.33221268 (10.995673623987257, 9)
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 0 23.0 998.686790566 (10.931193889570471, 7)
Current State,action,reward,Response time,Next State:  (7, 10.931193889570471) 1 24.0 1057.02541913 (10.816918347608043, 6)
Current State,action,reward,Response time,Next State:  (6, 10.816918347608043) 3 23.0 1112.82017919 (10.819208572963639, 7)
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 2 23.0 1050.04686027 (10.768325938188134, 7)
Current State,action,reward,Response time,Next State:  (7, 10.768325938188134) 2 23.0 1046.87602081 (10.772009508959538, 7)
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 1 24.0 1047.1055689 (10.644925616761762, 6)
Current State,action,reward,Response time,Next State:  (6, 10.644925616761762) 0 26.0 1101.44473373 (10.58735855349979, 4)
Current State,action,reward,Response time,Next State:  (4, 10.58735855349979) 2 26.0 1387.84028018 (10.552868829802469, 4)
Current State,action,reward,Response time,Next State:  (4, 10.552868829802469) 0 -66.3249648277 1383.92209616 (10.553846649940214, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.553846649940214) 0 -64.3122464116 2943.24964828 (10.489125480251131, 2)
Current State,action,reward,Response time,Next State:  (2, 10.489125480251131) 2 -63.0612324246 2923.12246412 (10.448897752470936, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.448897752470936) 0 -62.571500352 2910.61232425 (10.433149880183072, 2)
Current State,action,reward,Response time,Next State:  (2, 10.433149880183072) 2 -62.8421061381 2905.71500352 (10.44185150623065, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.44185150623065) 1 -60.636966392 2908.42106138 (10.370942817486826, 2)
Current State,action,reward,Response time,Next State:  (2, 10.370942817486826) 2 -62.3906407604 2886.36966392 (10.42733414151318, 2)
Current State,action,reward,Response time,Next State:  (2, 10.42733414151318) 4 26.0 2903.9064076 (10.388469398680568, 4)
Current State,action,reward,Response time,Next State:  (4, 10.388469398680568) 0 -59.799280449 1365.24559907 (10.344006106602812, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.344006106602812) 0 -59.0224715211 2877.99280449 (10.319026962956018, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.319026962956018) 1 -58.5006491501 2870.22471521 (10.30224719189987, 2)
Current State,action,reward,Response time,Next State:  (2, 10.30224719189987) 4 26.0 2865.0064915 (10.278181486298042, 4)
Current State,action,reward,Response time,Next State:  (4, 10.278181486298042) 2 26.0 1352.71640806 (10.268274366284802, 4)
Current State,action,reward,Response time,Next State:  (4, 10.268274366284802) 3 25.0 1351.59091573 (10.335411397720526, 5)
Current State,action,reward,Response time,Next State:  (5, 10.335411397720526) 4 23.0 1207.20535417 (10.305649118067803, 7)
Current State,action,reward,Response time,Next State:  (7, 10.305649118067803) 0 25.0 1018.04351387 (10.24826025489064, 5)
Current State,action,reward,Response time,Next State:  (5, 10.24826025489064) 0 27.0 1199.19771361 (10.276491935146446, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.276491935146446) 0 -56.4713008448 1921.51362397 (10.236991269871366, 2)
Current State,action,reward,Response time,Next State:  (2, 10.236991269871366) 3 27.0 2844.71300845 (10.236272697871373, 3)
Current State,action,reward,Response time,Next State:  (3, 10.236272697871373) 4 25.0 1913.60365711 (10.369891240151098, 5)
Current State,action,reward,Response time,Next State:  (5, 10.369891240151098) 3 24.0 1210.37343867 (10.316955310454549, 6)
Current State,action,reward,Response time,Next State:  (6, 10.316955310454549) 2 24.0 1079.75307088 (10.333617326102203, 6)
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 1 25.0 1080.85508169 (10.390165524255663, 5)
Current State,action,reward,Response time,Next State:  (5, 10.390165524255663) 0 27.0 1212.23628493 (10.425974763084863, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.425974763084863) 0 -66.0817367172 1950.9125956 (10.546025383098053, 2)
Current State,action,reward,Response time,Next State:  (2, 10.546025383098053) 4 26.0 2940.81736717 (10.655373370049301, 4)
Current State,action,reward,Response time,Next State:  (4, 10.655373370049301) 1 27.0 1395.56706193 (10.624473674922116, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.624473674922116) 0 -73.0897887049 1989.95162075 (10.771376986314287, 2)
Current State,action,reward,Response time,Next State:  (2, 10.771376986314287) 4 26.0 3010.89788705 (10.924797168745895, 4)
Current State,action,reward,Response time,Next State:  (4, 10.924797168745895) 2 26.0 1426.17478823 (11.039747673816453, 4)
Current State,action,reward,Response time,Next State:  (4, 11.039747673816453) 3 25.0 1439.23367024 (11.271571944085663, 5)
Current State,action,reward,Response time,Next State:  (5, 11.271571944085663) 3 24.0 1293.22186438 (11.670334358779868, 6)
Current State,action,reward,Response time,Next State:  (6, 11.670334358779868) 2 24.0 1169.26435121 (11.819721938468785, 6)
Current State,action,reward,Response time,Next State:  (6, 11.819721938468785) 4 22.0 1179.14471218 (12.19918626616789, 8)
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 3 21.0 1084.02242049 (12.501496275411796, 9)
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 1 21.0 1076.17782493 (13.649658108197247, 9)
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 2 21.0 1137.6097809 (14.283719188889453, 9)
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 1 22.0 1170.79974938 (14.677479537099185, 8)
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 4 20.0 1228.86576266 (15.353965082180355, 10)
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 2 20.0 1314.91162813 (17.944480812078613, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 4 19.0 1329.50910109 (18.385807405229915, 11)
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 4 19.0 1400.46626871 (19.02839494033929, 11)
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 2 20.0 1400.68584406 (19.340464848017284, 10)
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 0 22.0 1403.55780672 (19.213467265587269, 8)
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 0 23.0 1425.04216908 (19.385636054792762, 7)
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 4 21.0 1583.87861729 (19.223969507401588, 9)
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 2 21.0 1429.39747342 (19.25591252280865, 9)
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 0 23.0 1385.00495784 (17.82724819986867, 7)
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 1 24.0 1486.76498054 (17.229782241685768, 6)
Current State,action,reward,Response time,Next State:  (6, 17.229782241685768) 2 24.0 1536.96126404 (16.84211602880065, 6)
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 3 23.0 1511.32136729 (16.237094554670044, 7)
Current State,action,reward,Response time,Next State:  (7, 16.237094554670044) 3 22.0 1387.67180358 (15.950694610794756, 8)
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 0 24.0 1303.27855664 (15.828704162850809, 6)
Current State,action,reward,Response time,Next State:  (6, 15.828704162850809) 1 25.0 1444.29521247 (15.550833128512703, 5)
Current State,action,reward,Response time,Next State:  (5, 15.550833128512703) 2 25.0 1686.40988625 (15.446694946204717, 5)
Current State,action,reward,Response time,Next State:  (5, 15.446694946204717) 4 23.0 1676.84143877 (15.750501603468638, 7)
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 1 24.0 1357.34892154 (15.817158911312735, 6)
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 4 22.0 1443.53161985 (15.829956988360925, 8)
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 4 20.0 1296.22207104 (15.892373986997768, 10)
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 1 22.0 1268.3374073 (16.229253414601111, 8)
Current State,action,reward,Response time,Next State:  (8, 16.229253414601111) 1 23.0 1319.55886882 (16.295120821876548, 7)
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 1 24.0 1391.28781087 (16.667936385136993, 6)
Current State,action,reward,Response time,Next State:  (6, 16.667936385136993) 4 22.0 1499.80128138 (16.836383524612351, 8)
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 2 22.0 1355.04246364 (16.845818065953559, 8)
Current State,action,reward,Response time,Next State:  (8, 16.845818065953559) 2 22.0 1355.59386347 (17.052961248403161, 8)
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 3 21.0 1367.70030431 (17.215992726625572, 9)
############ Running episode number: 3  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 1 22.0 1023.46894667 (11.336751742492702, 8)
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 4 20.0 1033.61761156 (11.25610796929319, 10)
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 1 21.0 974.730436685 (11.027107764209074, 9)
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 0 23.0 1000.33221268 (10.995673623987257, 7)
Current State,action,reward,Response time,Next State:  (7, 10.995673623987257) 2 23.0 1061.04358539 (10.931193889570471, 7)
Current State,action,reward,Response time,Next State:  (7, 10.931193889570471) 3 22.0 1057.02541913 (10.816918347608043, 8)
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 4 20.0 1003.23605536 (10.819208572963639, 10)
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 2 21.0 986.786261176 (10.772009508959538, 9)
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 2 21.0 986.979077927 (10.644925616761762, 9)
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 4 19.0 980.32686333 (10.58735855349979, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 0 23.0 969.696935814 (10.370942817486826, 7)
Current State,action,reward,Response time,Next State:  (7, 10.370942817486826) 3 22.0 1022.11240377 (10.42733414151318, 8)
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 0 24.0 980.466886297 (10.388469398680568, 6)
Current State,action,reward,Response time,Next State:  (6, 10.388469398680568) 0 26.0 1084.48294874 (10.344006106602812, 4)
Current State,action,reward,Response time,Next State:  (4, 10.344006106602812) 4 24.0 1360.1943739 (10.319026962956018, 6)
Current State,action,reward,Response time,Next State:  (6, 10.319026962956018) 0 26.0 1079.89008812 (10.30224719189987, 4)
Current State,action,reward,Response time,Next State:  (4, 10.30224719189987) 3 25.0 1355.45037786 (10.278181486298042, 5)
Current State,action,reward,Response time,Next State:  (5, 10.278181486298042) 2 25.0 1201.9469428 (10.268274366284802, 5)
Current State,action,reward,Response time,Next State:  (5, 10.268274366284802) 2 25.0 1201.03665461 (10.335411397720526, 5)
Current State,action,reward,Response time,Next State:  (5, 10.335411397720526) 3 24.0 1207.20535417 (10.305649118067803, 6)
Current State,action,reward,Response time,Next State:  (6, 10.305649118067803) 4 22.0 1079.00528942 (10.24826025489064, 8)
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 3 21.0 970.000949704 (10.276491935146446, 9)
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 1 22.0 961.041178317 (10.236991269871366, 8)
Current State,action,reward,Response time,Next State:  (8, 10.236991269871366) 3 21.0 969.342336184 (10.236272697871373, 9)
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 0 23.0 958.935899728 (10.369891240151098, 7)
Current State,action,reward,Response time,Next State:  (7, 10.369891240151098) 2 23.0 1022.04687291 (10.316955310454549, 7)
Current State,action,reward,Response time,Next State:  (7, 10.316955310454549) 1 24.0 1018.74807882 (10.333617326102203, 6)
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 3 23.0 1080.85508169 (10.390165524255663, 7)
Current State,action,reward,Response time,Next State:  (7, 10.390165524255663) 4 21.0 1023.31030002 (10.425974763084863, 9)
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 1 22.0 968.865866662 (10.546025383098053, 8)
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 1 23.0 987.40377158 (10.655373370049301, 7)
Current State,action,reward,Response time,Next State:  (7, 10.655373370049301) 0 25.0 1039.83718601 (10.624473674922116, 5)
Current State,action,reward,Response time,Next State:  (5, 10.624473674922116) 2 25.0 1233.76503821 (10.771376986314287, 5)
Current State,action,reward,Response time,Next State:  (5, 10.771376986314287) 1 26.0 1247.26284073 (10.924797168745895, 4)
Current State,action,reward,Response time,Next State:  (4, 10.924797168745895) 3 25.0 1426.17478823 (11.039747673816453, 5)
Current State,action,reward,Response time,Next State:  (5, 11.039747673816453) 3 24.0 1271.92133555 (11.271571944085663, 6)
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 3 23.0 1142.8905616 (11.670334358779868, 7)
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 3 22.0 1103.08623692 (11.819721938468785, 8)
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 0 24.0 1061.84470565 (12.19918626616789, 6)
Current State,action,reward,Response time,Next State:  (6, 12.19918626616789) 4 22.0 1204.24214357 (12.501496275411796, 8)
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 0 24.0 1101.69086701 (13.168618569876575, 6)
Current State,action,reward,Response time,Next State:  (6, 13.168618569876575) 0 26.0 1268.35952947 (13.649658108197247, 4)
Current State,action,reward,Response time,Next State:  (4, 13.649658108197247) 2 26.0 1735.73095309 (14.283719188889453, 4)
Current State,action,reward,Response time,Next State:  (4, 14.283719188889453) 0 -194.562941566 1807.76307539 (14.677479537099185, 2)
Current State,action,reward,Response time,Next State:  (2, 14.677479537099185) 2 -215.600492669 4225.62941566 (15.353965082180355, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.353965082180355) 1 -230.62030723 4436.00492669 (15.836943704090487, 2)
Current State,action,reward,Response time,Next State:  (2, 15.836943704090487) 4 20.4220625085 4586.2030723 (16.466876895473597, 4)
Current State,action,reward,Response time,Next State:  (4, 16.466876895473597) 3 25.0 2055.77937492 (16.871606159345866, 5)
Current State,action,reward,Response time,Next State:  (5, 16.871606159345866) 0 -107.904695815 1807.76544576 (17.534967586021782, 3)
Current State,action,reward,Response time,Next State:  (3, 17.534967586021782) 1 -287.60303126 3349.04695815 (17.669285735563751, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 17.669285735563751) 1 -296.161130583 5156.0303126 (17.944480812078613, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 17.944480812078613) 0 -309.885637816 5241.61130583 (18.385807405229915, 2)
Current State,action,reward,Response time,Next State:  (2, 18.385807405229915) 2 -318.762972339 5378.85637816 (18.671267839956315, 2)
Current State,action,reward,Response time,Next State:  (2, 18.671267839956315) 3 -137.276115284 5467.62972339 (19.02839494033929, 3)
Current State,action,reward,Response time,Next State:  (3, 19.02839494033929) 4 22.0365107433 3642.76115284 (19.286321916040979, 5)
Current State,action,reward,Response time,Next State:  (5, 19.286321916040979) 2 21.5390334592 2029.63489257 (19.340464848017284, 5)
Current State,action,reward,Response time,Next State:  (5, 19.340464848017284) 3 24.0 2034.60966541 (19.213467265587269, 6)
Current State,action,reward,Response time,Next State:  (6, 19.213467265587269) 4 22.0 1668.16041811 (19.140765783401285, 8)
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 1 23.0 1489.72161235 (19.385636054792762, 7)
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 0 22.6094185224 1583.87861729 (19.223969507401588, 5)
Current State,action,reward,Response time,Next State:  (5, 19.223969507401588) 3 24.0 2023.90581478 (19.25591252280865, 6)
Current State,action,reward,Response time,Next State:  (6, 19.25591252280865) 0 -9.30510611419 1670.96770947 (19.08360399753829, 4)
Current State,action,reward,Response time,Next State:  (4, 19.08360399753829) 2 -4.58572458629 2353.05106114 (18.668181536495972, 4)
Current State,action,reward,Response time,Next State:  (4, 18.668181536495972) 1 -124.443318562 2305.85724586 (18.375894992990247, 3)
Current State,action,reward,Response time,Next State:  (3, 18.375894992990247) 1 -292.515395521 3514.43318562 (17.82724819986867, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 17.82724819986867) 0 -273.935219316 5205.15395521 (17.229782241685768, 2)
Current State,action,reward,Response time,Next State:  (2, 17.229782241685768) 3 -94.2782990187 5019.35219316 (16.84211602880065, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 16.84211602880065) 0 -243.064318827 3212.78299019 (16.237094554670044, 2)
Current State,action,reward,Response time,Next State:  (2, 16.237094554670044) 2 -234.157767166 4710.64318827 (15.950694610794756, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.950694610794756) 0 -230.364071496 4621.57767166 (15.828704162850809, 2)
Current State,action,reward,Response time,Next State:  (2, 15.828704162850809) 4 26.0 4583.64071496 (15.550833128512703, 4)
Current State,action,reward,Response time,Next State:  (4, 15.550833128512703) 2 26.0 1951.71278156 (15.446694946204717, 4)
Current State,action,reward,Response time,Next State:  (4, 15.446694946204717) 1 -72.8093838471 1939.88222688 (15.750501603468638, 3)
Current State,action,reward,Response time,Next State:  (3, 15.750501603468638) 3 26.0 2998.09383847 (15.817158911312735, 4)
Current State,action,reward,Response time,Next State:  (4, 15.817158911312735) 0 -230.403032241 1981.96855965 (15.829956988360925, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.829956988360925) 0 -232.344094852 4584.03032241 (15.892373986997768, 2)
Current State,action,reward,Response time,Next State:  (2, 15.892373986997768) 3 -76.827224819 4603.44094852 (15.954793861767499, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 15.954793861767499) 0 -235.833706102 3038.27224819 (16.004586266677634, 2)
Current State,action,reward,Response time,Next State:  (2, 16.004586266677634) 2 -236.241362763 4638.33706102 (16.017694914042416, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.017694914042416) 1 -234.059890506 4642.41362763 (15.947547279389703, 2)
Current State,action,reward,Response time,Next State:  (2, 15.947547279389703) 3 -79.971257016 4620.59890506 (16.11465619633363, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 16.11465619633363) 0 -240.264968694 3069.71257016 (16.147078378791146, 2)
Current State,action,reward,Response time,Next State:  (2, 16.147078378791146) 4 23.1215695679 4682.64968694 (16.229253414601111, 4)
Current State,action,reward,Response time,Next State:  (4, 16.229253414601111) 2 22.3732869054 2028.78430432 (16.295120821876548, 4)
Current State,action,reward,Response time,Next State:  (4, 16.295120821876548) 4 24.0 2036.26713095 (16.667936385136993, 6)
Current State,action,reward,Response time,Next State:  (6, 16.667936385136993) 1 25.0 1499.80128138 (16.836383524612351, 5)
Current State,action,reward,Response time,Next State:  (5, 16.836383524612351) 4 23.0 1804.52911185 (16.845818065953559, 7)
Current State,action,reward,Response time,Next State:  (7, 16.845818065953559) 4 21.0 1425.60546289 (17.052961248403161, 9)
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 1 22.0 1315.75590499 (17.215992726625572, 8)
############ Running episode number: 4  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 2 21.0 1040.0771169 (11.61852219546234, 9)
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 0 22.0 986.02903554 (11.336751742492702, 8)
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 3 21.0 1033.61761156 (11.25610796929319, 9)
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 4 19.0 1012.3192433 (11.027107764209074, 11)
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 2 20.0 951.555504911 (10.768325938188134, 10)
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 4 19.0 942.310823749 (10.58735855349979, 11)
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 4 19.0 975.508144832 (10.553846649940214, 11)
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 4 19.0 931.912703681 (10.433149880183072, 11)
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 2 20.0 931.538941947 (10.370942817486826, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 4 19.0 927.777654938 (10.42733414151318, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 0 23.0 962.389338906 (10.278181486298042, 7)
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 2 23.0 1016.33182085 (10.268274366284802, 7)
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 2 23.0 1015.71444152 (10.335411397720526, 7)
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 3 22.0 1019.89820185 (10.305649118067803, 8)
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 0 24.0 973.355030047 (10.24826025489064, 6)
Current State,action,reward,Response time,Next State:  (6, 10.24826025489064) 3 23.0 1075.20964131 (10.276491935146446, 7)
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 3 22.0 1016.22653355 (10.236991269871366, 8)
Current State,action,reward,Response time,Next State:  (8, 10.236991269871366) 2 22.0 969.342336184 (10.236272697871373, 8)
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 1 23.0 969.300339391 (10.369891240151098, 7)
Current State,action,reward,Response time,Next State:  (7, 10.369891240151098) 0 25.0 1022.04687291 (10.316955310454549, 5)
Current State,action,reward,Response time,Next State:  (5, 10.316955310454549) 0 27.0 1205.50956788 (10.333617326102203, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.333617326102203) 0 -61.2347599131 1932.74854487 (10.390165524255663, 2)
Current State,action,reward,Response time,Next State:  (2, 10.390165524255663) 3 27.0 2892.34759913 (10.425974763084863, 3)
Current State,action,reward,Response time,Next State:  (3, 10.425974763084863) 2 27.0 1950.9125956 (10.546025383098053, 3)
Current State,action,reward,Response time,Next State:  (3, 10.546025383098053) 3 26.0 1974.52309871 (10.655373370049301, 4)
Current State,action,reward,Response time,Next State:  (4, 10.655373370049301) 2 26.0 1395.56706193 (10.624473674922116, 4)
Current State,action,reward,Response time,Next State:  (4, 10.624473674922116) 1 25.1156724308 1392.05672091 (10.771376986314287, 3)
Current State,action,reward,Response time,Next State:  (3, 10.771376986314287) 4 25.0 2018.84327569 (10.924797168745895, 5)
Current State,action,reward,Response time,Next State:  (5, 10.924797168745895) 0 19.8375931101 1261.35942784 (11.039747673816453, 3)
Current State,action,reward,Response time,Next State:  (3, 11.039747673816453) 3 26.0 2071.6240689 (11.271571944085663, 4)
Current State,action,reward,Response time,Next State:  (4, 11.271571944085663) 3 25.0 1465.56992505 (11.670334358779868, 5)
Current State,action,reward,Response time,Next State:  (5, 11.670334358779868) 4 23.0 1329.8610407 (11.819721938468785, 7)
Current State,action,reward,Response time,Next State:  (7, 11.819721938468785) 4 21.0 1112.39558253 (12.19918626616789, 9)
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 0 22.0 1040.79092857 (13.168618569876575, 8)
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 4 20.0 1140.68069275 (13.649658108197247, 10)
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 0 23.0 1226.82184023 (15.836943704090487, 7)
Current State,action,reward,Response time,Next State:  (7, 15.836943704090487) 3 22.0 1362.73571067 (16.466876895473597, 8)
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 1 23.0 1333.44672445 (16.871606159345866, 7)
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 0 25.0 1427.21249257 (17.534967586021782, 5)
Current State,action,reward,Response time,Next State:  (5, 17.534967586021782) 2 25.0 1868.71656697 (17.669285735563751, 5)
Current State,action,reward,Response time,Next State:  (5, 17.669285735563751) 0 -115.958642802 1881.05801687 (17.944480812078613, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 17.944480812078613) 0 -309.885637816 3429.58642802 (18.385807405229915, 2)
Current State,action,reward,Response time,Next State:  (2, 18.385807405229915) 3 -130.252452674 5378.85637816 (18.671267839956315, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 18.671267839956315) 0 -329.869018402 3572.52452674 (19.02839494033929, 2)
Current State,action,reward,Response time,Next State:  (2, 19.02839494033929) 4 -11.6080706615 5578.69018402 (19.286321916040979, 4)
Current State,action,reward,Response time,Next State:  (4, 19.286321916040979) 2 -12.2231581375 2376.08070662 (19.340464848017284, 4)
Current State,action,reward,Response time,Next State:  (4, 19.340464848017284) 1 -140.915955469 2382.23158138 (19.213467265587269, 3)
Current State,action,reward,Response time,Next State:  (3, 19.213467265587269) 3 -9.95448909562 3679.15955469 (19.140765783401285, 4)
Current State,action,reward,Response time,Next State:  (4, 19.140765783401285) 4 24.0 2359.54489096 (19.385636054792762, 6)
Current State,action,reward,Response time,Next State:  (6, 19.385636054792762) 3 23.0 1679.54750791 (19.223969507401588, 7)
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 0 22.3159190023 1573.80408654 (19.25591252280865, 5)
Current State,action,reward,Response time,Next State:  (5, 19.25591252280865) 3 24.0 2026.84080998 (19.08360399753829, 6)
Current State,action,reward,Response time,Next State:  (6, 19.08360399753829) 1 25.0 1659.57137766 (18.668181536495972, 5)
Current State,action,reward,Response time,Next State:  (5, 18.668181536495972) 2 25.0 1972.8387816 (18.375894992990247, 5)
Current State,action,reward,Response time,Next State:  (5, 18.375894992990247) 0 -113.653014598 1945.98284482 (17.82724819986867, 3)
Current State,action,reward,Response time,Next State:  (3, 17.82724819986867) 4 25.0 3406.53014598 (17.229782241685768, 5)
Current State,action,reward,Response time,Next State:  (5, 17.229782241685768) 2 25.0 1840.67545971 (16.84211602880065, 5)
Current State,action,reward,Response time,Next State:  (5, 16.84211602880065) 2 25.0 1805.05582707 (16.237094554670044, 5)
Current State,action,reward,Response time,Next State:  (5, 16.237094554670044) 2 25.0 1749.46511027 (15.950694610794756, 5)
Current State,action,reward,Response time,Next State:  (5, 15.950694610794756) 4 23.0 1723.15004733 (15.828704162850809, 7)
Current State,action,reward,Response time,Next State:  (7, 15.828704162850809) 3 22.0 1362.22224939 (15.550833128512703, 8)
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 2 22.0 1279.90873428 (15.446694946204717, 8)
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 2 22.0 1273.82239956 (15.750501603468638, 8)
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 2 22.0 1291.57831736 (15.817158911312735, 8)
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 1 22.0 1251.7376675 (15.892373986997768, 8)
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 4 20.0 1299.87001973 (15.954793861767499, 10)
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 1 21.0 1223.96796344 (16.004586266677634, 9)
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 1 22.0 1260.87864843 (16.017694914042416, 8)
Current State,action,reward,Response time,Next State:  (8, 16.017694914042416) 4 20.0 1307.19437562 (15.947547279389703, 10)
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 2 20.0 1223.58357506 (16.11465619633363, 10)
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 1 22.0 1276.0866986 (16.667936385136993, 8)
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 1 23.0 1345.1976051 (16.836383524612351, 7)
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 4 21.0 1425.01753312 (16.845818065953559, 9)
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 4 19.0 1304.91298164 (17.052961248403161, 11)
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
############ Running episode number: 5  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 0 23.0 1040.0771169 (11.61852219546234, 7)
Current State,action,reward,Response time,Next State:  (7, 11.61852219546234) 3 22.0 1099.85747227 (11.469111876584304, 8)
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 0 24.0 1041.35337246 (11.336751742492702, 6)
Current State,action,reward,Response time,Next State:  (6, 11.336751742492702) 3 23.0 1147.20149519 (11.25610796929319, 7)
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 2 23.0 1077.27300243 (11.027107764209074, 7)
Current State,action,reward,Response time,Next State:  (7, 11.027107764209074) 1 24.0 1063.00245825 (10.995673623987257, 6)
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 3 23.0 1124.64289336 (10.931193889570471, 7)
Current State,action,reward,Response time,Next State:  (7, 10.931193889570471) 0 25.0 1057.02541913 (10.816918347608043, 5)
Current State,action,reward,Response time,Next State:  (5, 10.816918347608043) 4 23.0 1251.44728215 (10.819208572963639, 7)
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 4 21.0 1050.04686027 (10.768325938188134, 9)
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 4 19.0 986.786261176 (10.772009508959538, 11)
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 2 21.0 975.508144832 (10.553846649940214, 9)
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 0 23.0 975.559328891 (10.489125480251131, 7)
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 1 24.0 1029.47716098 (10.448897752470936, 6)
Current State,action,reward,Response time,Next State:  (6, 10.448897752470936) 4 22.0 1088.47962603 (10.433149880183072, 8)
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 2 22.0 980.806785952 (10.44185150623065, 8)
Current State,action,reward,Response time,Next State:  (8, 10.44185150623065) 3 21.0 981.315350702 (10.370942817486826, 9)
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 1 22.0 965.985215893 (10.42733414151318, 8)
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 1 23.0 980.466886297 (10.388469398680568, 7)
Current State,action,reward,Response time,Next State:  (7, 10.388469398680568) 3 22.0 1023.20460302 (10.344006106602812, 8)
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 0 24.0 975.596796379 (10.319026962956018, 6)
Current State,action,reward,Response time,Next State:  (6, 10.319026962956018) 4 22.0 1079.89008812 (10.30224719189987, 8)
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 3 21.0 973.15620517 (10.278181486298042, 9)
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 2 21.0 961.129617982 (10.268274366284802, 9)
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 0 23.0 960.611029141 (10.335411397720526, 7)
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 1 24.0 1019.89820185 (10.305649118067803, 6)
Current State,action,reward,Response time,Next State:  (6, 10.305649118067803) 2 24.0 1079.00528942 (10.24826025489064, 6)
Current State,action,reward,Response time,Next State:  (6, 10.24826025489064) 3 23.0 1075.20964131 (10.276491935146446, 7)
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 0 25.0 1016.22653355 (10.236991269871366, 5)
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 1 26.0 1198.16229423 (10.236272697871373, 4)
Current State,action,reward,Response time,Next State:  (4, 10.236272697871373) 4 24.0 1347.95538572 (10.369891240151098, 6)
Current State,action,reward,Response time,Next State:  (6, 10.369891240151098) 0 26.0 1083.25420594 (10.316955310454549, 4)
Current State,action,reward,Response time,Next State:  (4, 10.316955310454549) 2 26.0 1357.1212847 (10.333617326102203, 4)
Current State,action,reward,Response time,Next State:  (4, 10.333617326102203) 0 -61.2347599131 1359.01416283 (10.390165524255663, 2)
Current State,action,reward,Response time,Next State:  (2, 10.390165524255663) 2 -62.3483664008 2892.34759913 (10.425974763084863, 2)
Current State,action,reward,Response time,Next State:  (2, 10.425974763084863) 3 27.0 2903.48366401 (10.546025383098053, 3)
Current State,action,reward,Response time,Next State:  (3, 10.546025383098053) 4 25.0 1974.52309871 (10.655373370049301, 5)
Current State,action,reward,Response time,Next State:  (5, 10.655373370049301) 1 26.0 1236.60417082 (10.624473674922116, 4)
Current State,action,reward,Response time,Next State:  (4, 10.624473674922116) 3 25.0 1392.05672091 (10.771376986314287, 5)
Current State,action,reward,Response time,Next State:  (5, 10.771376986314287) 0 22.0983388322 1247.26284073 (10.924797168745895, 3)
Current State,action,reward,Response time,Next State:  (3, 10.924797168745895) 2 19.8375931101 2049.01661168 (11.039747673816453, 3)
Current State,action,reward,Response time,Next State:  (3, 11.039747673816453) 4 25.0 2071.6240689 (11.271571944085663, 5)
Current State,action,reward,Response time,Next State:  (5, 11.271571944085663) 3 24.0 1293.22186438 (11.670334358779868, 6)
Current State,action,reward,Response time,Next State:  (6, 11.670334358779868) 4 22.0 1169.26435121 (11.819721938468785, 8)
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 4 20.0 1061.84470565 (12.19918626616789, 10)
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 0 23.0 1170.79974938 (14.677479537099185, 7)
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 2 23.0 1290.48169407 (15.353965082180355, 7)
Current State,action,reward,Response time,Next State:  (7, 15.353965082180355) 3 22.0 1332.63806181 (15.836943704090487, 8)
Current State,action,reward,Response time,Next State:  (8, 15.836943704090487) 2 22.0 1296.63040821 (16.466876895473597, 8)
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 3 21.0 1333.44672445 (16.871606159345866, 9)
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 2 21.0 1306.26286107 (17.534967586021782, 9)
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 2 21.0 1340.98655806 (17.669285735563751, 9)
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 1 22.0 1348.01745033 (17.944480812078613, 8)
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 0 24.0 1419.80498244 (18.385807405229915, 6)
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 0 -4.62078634849 1613.41973487 (18.671267839956315, 4)
Current State,action,reward,Response time,Next State:  (4, 18.671267839956315) 2 -8.67790698763 2306.20786348 (19.02839494033929, 4)
Current State,action,reward,Response time,Next State:  (4, 19.02839494033929) 3 22.0365107433 2346.77906988 (19.286321916040979, 5)
Current State,action,reward,Response time,Next State:  (5, 19.286321916040979) 2 21.5390334592 2029.63489257 (19.340464848017284, 5)
Current State,action,reward,Response time,Next State:  (5, 19.340464848017284) 4 23.0 2034.60966541 (19.213467265587269, 7)
Current State,action,reward,Response time,Next State:  (7, 19.213467265587269) 3 22.0 1573.14962117 (19.140765783401285, 8)
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 1 23.0 1489.72161235 (19.385636054792762, 7)
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 1 24.0 1583.87861729 (19.223969507401588, 6)
Current State,action,reward,Response time,Next State:  (6, 19.223969507401588) 4 22.0 1668.85502699 (19.25591252280865, 8)
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 3 21.0 1496.45133993 (19.08360399753829, 9)
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 1 21.0 1367.89714889 (18.375894992990247, 9)
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 0 23.0 1385.00495784 (17.82724819986867, 7)
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 2 23.0 1486.76498054 (17.229782241685768, 7)
Current State,action,reward,Response time,Next State:  (7, 17.229782241685768) 4 21.0 1449.53285514 (16.84211602880065, 9)
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 2 21.0 1304.71919827 (16.237094554670044, 9)
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 4 19.0 1273.04930988 (15.950694610794756, 11)
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 4 19.0 1217.27964986 (15.550833128512703, 11)
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 0 23.0 1231.67579099 (15.750501603468638, 7)
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 3 22.0 1357.34892154 (15.817158911312735, 8)
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 2 22.0 1295.47409005 (15.829956988360925, 8)
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 1 23.0 1296.22207104 (15.892373986997768, 7)
Current State,action,reward,Response time,Next State:  (7, 15.892373986997768) 1 24.0 1366.1899447 (15.954793861767499, 6)
Current State,action,reward,Response time,Next State:  (6, 15.954793861767499) 4 22.0 1452.63467243 (16.004586266677634, 8)
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 1 23.0 1306.42824342 (16.017694914042416, 7)
Current State,action,reward,Response time,Next State:  (7, 16.017694914042416) 3 22.0 1373.9995352 (15.947547279389703, 8)
Current State,action,reward,Response time,Next State:  (8, 15.947547279389703) 2 22.0 1303.0946115 (16.11465619633363, 8)
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 1 23.0 1312.86125789 (16.147078378791146, 7)
Current State,action,reward,Response time,Next State:  (7, 16.147078378791146) 0 25.0 1382.06228977 (16.229253414601111, 5)
Current State,action,reward,Response time,Next State:  (5, 16.229253414601111) 1 22.3732869054 1748.74464891 (16.295120821876548, 4)
Current State,action,reward,Response time,Next State:  (4, 16.295120821876548) 2 18.1379384408 2036.26713095 (16.667936385136993, 4)
Current State,action,reward,Response time,Next State:  (4, 16.667936385136993) 2 16.224304988 2078.62061559 (16.836383524612351, 4)
Current State,action,reward,Response time,Next State:  (4, 16.836383524612351) 1 -94.3511074387 2097.75695012 (16.845818065953559, 3)
Current State,action,reward,Response time,Next State:  (3, 16.845818065953559) 1 -268.436386844 3213.51107439 (17.052961248403161, 2)
Current State,action,reward,Response time,Next State:  (2, 17.052961248403161) 3 -101.631378022 4964.36386844 (17.215992726625572, 3)
############ Running episode number: 6  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 0 24.0 1070.83307124 (11.786394321941378, 6)
Current State,action,reward,Response time,Next State:  (6, 11.786394321941378) 1 25.0 1176.94045342 (11.61852219546234, 5)
Current State,action,reward,Response time,Next State:  (5, 11.61852219546234) 3 24.0 1325.10042406 (11.469111876584304, 6)
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 0 26.0 1155.95567613 (11.336751742492702, 4)
Current State,action,reward,Response time,Next State:  (4, 11.336751742492702) 3 25.0 1472.97463629 (11.25610796929319, 5)
Current State,action,reward,Response time,Next State:  (5, 11.25610796929319) 1 26.0 1291.80100003 (11.027107764209074, 4)
Current State,action,reward,Response time,Next State:  (4, 11.027107764209074) 4 24.0 1437.79772102 (10.995673623987257, 6)
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 1 25.0 1124.64289336 (10.931193889570471, 5)
Current State,action,reward,Response time,Next State:  (5, 10.931193889570471) 4 23.0 1261.94717276 (10.816918347608043, 7)
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 1 24.0 1049.90414092 (10.819208572963639, 6)
Current State,action,reward,Response time,Next State:  (6, 10.819208572963639) 3 23.0 1112.97165264 (10.768325938188134, 7)
Current State,action,reward,Response time,Next State:  (7, 10.768325938188134) 1 24.0 1046.87602081 (10.772009508959538, 6)
Current State,action,reward,Response time,Next State:  (6, 10.772009508959538) 2 24.0 1109.84994875 (10.644925616761762, 6)
Current State,action,reward,Response time,Next State:  (6, 10.644925616761762) 2 24.0 1101.44473373 (10.58735855349979, 6)
Current State,action,reward,Response time,Next State:  (6, 10.58735855349979) 2 24.0 1097.63729963 (10.552868829802469, 6)
Current State,action,reward,Response time,Next State:  (6, 10.552868829802469) 0 26.0 1095.35618014 (10.553846649940214, 4)
Current State,action,reward,Response time,Next State:  (4, 10.553846649940214) 0 -64.3122464116 1384.03318082 (10.489125480251131, 2)
Current State,action,reward,Response time,Next State:  (2, 10.489125480251131) 2 -63.0612324246 2923.12246412 (10.448897752470936, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.448897752470936) 0 -62.571500352 2910.61232425 (10.433149880183072, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.433149880183072) 0 -62.8421061381 2905.71500352 (10.44185150623065, 2)
Current State,action,reward,Response time,Next State:  (2, 10.44185150623065) 4 26.0 2908.42106138 (10.370942817486826, 4)
Current State,action,reward,Response time,Next State:  (4, 10.370942817486826) 4 24.0 1363.25450251 (10.42733414151318, 6)
Current State,action,reward,Response time,Next State:  (6, 10.42733414151318) 3 23.0 1087.05342808 (10.388469398680568, 7)
Current State,action,reward,Response time,Next State:  (7, 10.388469398680568) 3 22.0 1023.20460302 (10.344006106602812, 8)
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 3 21.0 975.596796379 (10.319026962956018, 9)
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 4 19.0 963.267677113 (10.30224719189987, 11)
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 2 19.0 912.494916918 (10.278181486298042, 11)
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 1 21.0 920.672316722 (10.236272697871373, 9)
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 4 19.0 958.935899728 (10.369891240151098, 11)
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 1 22.0 966.991429728 (10.425974763084863, 8)
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 2 22.0 980.387437704 (10.546025383098053, 8)
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 0 24.0 987.40377158 (10.655373370049301, 6)
Current State,action,reward,Response time,Next State:  (6, 10.655373370049301) 3 23.0 1102.13573879 (10.624473674922116, 7)
Current State,action,reward,Response time,Next State:  (7, 10.624473674922116) 3 22.0 1037.91161802 (10.771376986314287, 8)
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 0 24.0 1000.57439983 (10.924797168745895, 6)
Current State,action,reward,Response time,Next State:  (6, 10.924797168745895) 4 22.0 1119.95518797 (11.039747673816453, 8)
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 0 24.0 1016.25926965 (11.271571944085663, 6)
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 4 22.0 1142.8905616 (11.670334358779868, 8)
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 0 24.0 1053.11377918 (11.819721938468785, 6)
Current State,action,reward,Response time,Next State:  (6, 11.819721938468785) 2 24.0 1179.14471218 (12.19918626616789, 6)
Current State,action,reward,Response time,Next State:  (6, 12.19918626616789) 3 23.0 1204.24214357 (12.501496275411796, 7)
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 3 22.0 1154.88153049 (13.168618569876575, 8)
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 1 23.0 1140.68069275 (13.649658108197247, 7)
Current State,action,reward,Response time,Next State:  (7, 13.649658108197247) 1 24.0 1226.43122257 (14.283719188889453, 6)
Current State,action,reward,Response time,Next State:  (6, 14.283719188889453) 3 23.0 1342.11128751 (14.677479537099185, 7)
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 3 22.0 1290.48169407 (15.353965082180355, 8)
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 1 23.0 1268.40282167 (15.836943704090487, 7)
Current State,action,reward,Response time,Next State:  (7, 15.836943704090487) 1 24.0 1362.73571067 (16.466876895473597, 6)
Current State,action,reward,Response time,Next State:  (6, 16.466876895473597) 3 23.0 1486.50338648 (16.871606159345866, 7)
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 3 22.0 1427.21249257 (17.534967586021782, 8)
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 0 24.0 1395.8710659 (17.669285735563751, 6)
Current State,action,reward,Response time,Next State:  (6, 17.669285735563751) 2 24.0 1566.02963221 (17.944480812078613, 6)
Current State,action,reward,Response time,Next State:  (6, 17.944480812078613) 4 22.0 1584.23078855 (18.385807405229915, 8)
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 3 21.0 1445.59822471 (18.671267839956315, 9)
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 2 21.0 1400.46626871 (19.02839494033929, 9)
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 3 20.0 1419.16011 (19.286321916040979, 10)
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 2 20.0 1400.68584406 (19.340464848017284, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 4 19.0 1403.55780672 (19.213467265587269, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 2 21.0 1429.39747342 (19.25591252280865, 9)
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 1 21.0 1389.93285614 (18.668181536495972, 9)
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 0 22.0 1223.7505224 (15.828704162850809, 8)
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 1 23.0 1296.14884991 (15.550833128512703, 7)
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 0 25.0 1344.9062349 (15.446694946204717, 5)
Current State,action,reward,Response time,Next State:  (5, 15.446694946204717) 0 -72.8093838471 1676.84143877 (15.750501603468638, 3)
Current State,action,reward,Response time,Next State:  (3, 15.750501603468638) 4 25.0 2998.09383847 (15.817158911312735, 5)
Current State,action,reward,Response time,Next State:  (5, 15.817158911312735) 1 26.0 1710.88049068 (15.829956988360925, 4)
Current State,action,reward,Response time,Next State:  (4, 15.829956988360925) 2 26.0 1983.42247739 (15.892373986997768, 4)
Current State,action,reward,Response time,Next State:  (4, 15.892373986997768) 1 -76.827224819 1990.51332244 (15.954793861767499, 3)
Current State,action,reward,Response time,Next State:  (3, 15.954793861767499) 2 -77.8064981709 3038.27224819 (16.004586266677634, 3)
Current State,action,reward,Response time,Next State:  (3, 16.004586266677634) 1 -236.241362763 3048.06498171 (16.017694914042416, 2)
Current State,action,reward,Response time,Next State:  (2, 16.017694914042416) 3 -76.6847053921 4642.41362763 (15.947547279389703, 3)
Current State,action,reward,Response time,Next State:  (3, 15.947547279389703) 3 24.4234442756 3036.84705392 (16.11465619633363, 4)
Current State,action,reward,Response time,Next State:  (4, 16.11465619633363) 0 -240.264968694 2015.76555724 (16.147078378791146, 2)
Current State,action,reward,Response time,Next State:  (2, 16.147078378791146) 4 23.1215695679 4682.64968694 (16.229253414601111, 4)
Current State,action,reward,Response time,Next State:  (4, 16.229253414601111) 0 -244.86883715 2028.78430432 (16.295120821876548, 2)
Current State,action,reward,Response time,Next State:  (2, 16.295120821876548) 4 18.1379384408 4728.6883715 (16.667936385136993, 4)
Current State,action,reward,Response time,Next State:  (4, 16.667936385136993) 3 25.0 2078.62061559 (16.836383524612351, 5)
Current State,action,reward,Response time,Next State:  (5, 16.836383524612351) 4 23.0 1804.52911185 (16.845818065953559, 7)
Current State,action,reward,Response time,Next State:  (7, 16.845818065953559) 1 24.0 1425.60546289 (17.052961248403161, 6)
Current State,action,reward,Response time,Next State:  (6, 17.052961248403161) 2 24.0 1525.26648162 (17.215992726625572, 6)
############ Running episode number: 7  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 0 22.0 1012.7846064 (11.786394321941378, 8)
Current State,action,reward,Response time,Next State:  (8, 11.786394321941378) 0 24.0 1059.89687994 (11.61852219546234, 6)
Current State,action,reward,Response time,Next State:  (6, 11.61852219546234) 1 25.0 1165.83754105 (11.469111876584304, 5)
Current State,action,reward,Response time,Next State:  (5, 11.469111876584304) 1 26.0 1311.37227212 (11.336751742492702, 4)
Current State,action,reward,Response time,Next State:  (4, 11.336751742492702) 0 -88.1640987176 1472.97463629 (11.25610796929319, 2)
Current State,action,reward,Response time,Next State:  (2, 11.25610796929319) 4 26.0 3161.64098718 (11.027107764209074, 4)
Current State,action,reward,Response time,Next State:  (4, 11.027107764209074) 2 26.0 1437.79772102 (10.995673623987257, 4)
Current State,action,reward,Response time,Next State:  (4, 10.995673623987257) 4 24.0 1434.22666469 (10.931193889570471, 6)
Current State,action,reward,Response time,Next State:  (6, 10.931193889570471) 3 23.0 1120.37826137 (10.816918347608043, 7)
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 0 25.0 1049.90414092 (10.819208572963639, 5)
Current State,action,reward,Response time,Next State:  (5, 10.819208572963639) 0 25.1756777696 1251.65771315 (10.768325938188134, 3)
Current State,action,reward,Response time,Next State:  (3, 10.768325938188134) 3 26.0 2018.2432223 (10.772009508959538, 4)
Current State,action,reward,Response time,Next State:  (4, 10.772009508959538) 0 -69.1573659527 1408.81743934 (10.644925616761762, 2)
Current State,action,reward,Response time,Next State:  (2, 10.644925616761762) 2 -67.3671280917 2971.57365953 (10.58735855349979, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.58735855349979) 0 -66.2945562827 2953.67128092 (10.552868829802469, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.552868829802469) 1 -66.3249648277 2942.94556283 (10.553846649940214, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.553846649940214) 1 -64.3122464116 2943.24964828 (10.489125480251131, 2)
Current State,action,reward,Response time,Next State:  (2, 10.489125480251131) 2 -63.0612324246 2923.12246412 (10.448897752470936, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.448897752470936) 1 -62.571500352 2910.61232425 (10.433149880183072, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.433149880183072) 0 -62.8421061381 2905.71500352 (10.44185150623065, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.44185150623065) 0 -60.636966392 2908.42106138 (10.370942817486826, 2)
Current State,action,reward,Response time,Next State:  (2, 10.370942817486826) 3 27.0 2886.36966392 (10.42733414151318, 3)
Current State,action,reward,Response time,Next State:  (3, 10.42733414151318) 1 -61.1820132895 1951.17994623 (10.388469398680568, 2)
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.388469398680568) 1 -59.799280449 2891.82013289 (10.344006106602812, 2)
Current State,action,reward,Response time,Next State:  (2, 10.344006106602812) 3 27.0 2877.99280449 (10.319026962956018, 3)
Current State,action,reward,Response time,Next State:  (3, 10.319026962956018) 3 26.0 1929.87904021 (10.30224719189987, 4)
Current State,action,reward,Response time,Next State:  (4, 10.30224719189987) 2 26.0 1355.45037786 (10.278181486298042, 4)
Current State,action,reward,Response time,Next State:  (4, 10.278181486298042) 4 24.0 1352.71640806 (10.268274366284802, 6)
Current State,action,reward,Response time,Next State:  (6, 10.268274366284802) 1 25.0 1076.53335675 (10.335411397720526, 5)
Current State,action,reward,Response time,Next State:  (5, 10.335411397720526) 2 25.0 1207.20535417 (10.305649118067803, 5)
Current State,action,reward,Response time,Next State:  (5, 10.305649118067803) 0 27.0 1204.47072981 (10.24826025489064, 3)
Current State,action,reward,Response time,Next State:  (3, 10.24826025489064) 3 26.0 1915.96126469 (10.276491935146446, 4)
Current State,action,reward,Response time,Next State:  (4, 10.276491935146446) 3 25.0 1352.52446763 (10.236991269871366, 5)
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 3 24.0 1198.16229423 (10.236272697871373, 6)
Current State,action,reward,Response time,Next State:  (6, 10.236272697871373) 1 25.0 1074.41679501 (10.369891240151098, 5)
Current State,action,reward,Response time,Next State:  (5, 10.369891240151098) 0 27.0 1210.37343867 (10.316955310454549, 3)
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.316955310454549) 0 -59.4762070259 1929.47160576 (10.333617326102203, 2)
Current State,action,reward,Response time,Next State:  (2, 10.333617326102203) 3 27.0 2874.76207026 (10.390165524255663, 3)
Current State,action,reward,Response time,Next State:  (3, 10.390165524255663) 1 -62.3483664008 1943.86994856 (10.425974763084863, 2)
Current State,action,reward,Response time,Next State:  (2, 10.425974763084863) 3 27.0 2903.48366401 (10.546025383098053, 3)
Current State,action,reward,Response time,Next State:  (3, 10.546025383098053) 2 27.0 1974.52309871 (10.655373370049301, 3)
Current State,action,reward,Response time,Next State:  (3, 10.655373370049301) 1 -68.5213453093 1996.0287018 (10.624473674922116, 2)
Current State,action,reward,Response time,Next State:  (2, 10.624473674922116) 3 25.1156724308 2965.21345309 (10.771376986314287, 3)
Current State,action,reward,Response time,Next State:  (3, 10.771376986314287) 3 26.0 2018.84327569 (10.924797168745895, 4)
Current State,action,reward,Response time,Next State:  (4, 10.924797168745895) 3 25.0 1426.17478823 (11.039747673816453, 5)
Current State,action,reward,Response time,Next State:  (5, 11.039747673816453) 3 24.0 1271.92133555 (11.271571944085663, 6)
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 3 23.0 1142.8905616 (11.670334358779868, 7)
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 4 21.0 1103.08623692 (11.819721938468785, 9)
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 2 21.0 1041.82165315 (12.19918626616789, 9)
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 4 19.0 1061.68473805 (12.501496275411796, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 1 22.0 1170.79974938 (14.677479537099185, 8)
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 4 20.0 1228.86576266 (15.353965082180355, 10)
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 2 20.0 1192.09754638 (15.836943704090487, 10)
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 0 22.0 1217.71670884 (16.466876895473597, 8)
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 3 21.0 1333.44672445 (16.871606159345866, 9)
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 1 21.0 1307.78684385 (17.669285735563751, 9)
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 4 19.0 1329.50910109 (18.385807405229915, 11)
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 2 20.0 1368.06085906 (19.02839494033929, 10)
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 1 21.0 1387.00434183 (19.286321916040979, 9)
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 1 22.0 1432.66131431 (19.340464848017284, 8)
Current State,action,reward,Response time,Next State:  (8, 19.340464848017284) 0 24.0 1501.39298325 (19.213467265587269, 6)
Current State,action,reward,Response time,Next State:  (6, 19.213467265587269) 2 24.0 1668.16041811 (19.140765783401285, 6)
Current State,action,reward,Response time,Next State:  (6, 19.140765783401285) 3 23.0 1663.35200707 (19.385636054792762, 7)
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 2 23.0 1583.87861729 (19.223969507401588, 7)
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 4 21.0 1573.80408654 (19.25591252280865, 9)
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 0 22.0 1389.93285614 (18.668181536495972, 8)
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 0 23.0 1385.00495784 (17.82724819986867, 7)
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 2 23.0 1486.76498054 (17.229782241685768, 7)
Current State,action,reward,Response time,Next State:  (7, 17.229782241685768) 4 21.0 1449.53285514 (16.84211602880065, 9)
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 1 22.0 1304.71919827 (16.237094554670044, 8)
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 2 22.0 1320.01714264 (15.950694610794756, 8)
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 4 20.0 1303.27855664 (15.828704162850809, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 4 19.0 1217.27964986 (15.550833128512703, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 0 23.0 1251.7376675 (15.892373986997768, 7)
Current State,action,reward,Response time,Next State:  (7, 15.892373986997768) 4 21.0 1366.1899447 (15.954793861767499, 9)
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 0 23.0 1258.27226176 (16.004586266677634, 7)
Current State,action,reward,Response time,Next State:  (7, 16.004586266677634) 1 24.0 1373.18264715 (16.017694914042416, 6)
Current State,action,reward,Response time,Next State:  (6, 16.017694914042416) 3 23.0 1456.7948918 (15.947547279389703, 7)
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 3 22.0 1369.62816392 (16.11465619633363, 8)
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 0 24.0 1312.86125789 (16.147078378791146, 6)
Current State,action,reward,Response time,Next State:  (6, 16.147078378791146) 0 23.1215695679 1465.35219849 (16.229253414601111, 4)
Current State,action,reward,Response time,Next State:  (4, 16.229253414601111) 1 -83.5204770037 2028.78430432 (16.295120821876548, 3)
Current State,action,reward,Response time,Next State:  (3, 16.295120821876548) 3 18.1379384408 3105.20477004 (16.667936385136993, 4)
Current State,action,reward,Response time,Next State:  (4, 16.667936385136993) 3 25.0 2078.62061559 (16.836383524612351, 5)
Current State,action,reward,Response time,Next State:  (5, 16.836383524612351) 3 24.0 1804.52911185 (16.845818065953559, 6)
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 0 13.7638869682 1511.56621672 (17.052961248403161, 4)
Current State,action,reward,Response time,Next State:  (4, 17.052961248403161) 4 24.0 2122.36113032 (17.215992726625572, 6)
############ Running episode number: 8  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 1 21.0 1002.85899476 (11.61852219546234, 9)
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 4 19.0 974.730436685 (11.027107764209074, 11)
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 1 22.0 986.979077927 (10.644925616761762, 8)
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 3 21.0 993.183975462 (10.58735855349979, 9)
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 1 22.0 977.313511661 (10.552868829802469, 8)
Current State,action,reward,Response time,Next State:  (8, 10.552868829802469) 0 24.0 987.80373542 (10.553846649940214, 6)
Current State,action,reward,Response time,Next State:  (6, 10.553846649940214) 0 26.0 1095.42085229 (10.489125480251131, 4)
Current State,action,reward,Response time,Next State:  (4, 10.489125480251131) 4 24.0 1376.68057182 (10.448897752470936, 6)
Current State,action,reward,Response time,Next State:  (6, 10.448897752470936) 4 22.0 1088.47962603 (10.433149880183072, 8)
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 1 23.0 980.806785952 (10.44185150623065, 7)
Current State,action,reward,Response time,Next State:  (7, 10.44185150623065) 0 25.0 1026.53120146 (10.370942817486826, 5)
Current State,action,reward,Response time,Next State:  (5, 10.370942817486826) 1 26.0 1210.47005993 (10.42733414151318, 4)
Current State,action,reward,Response time,Next State:  (4, 10.42733414151318) 4 24.0 1369.66080447 (10.388469398680568, 6)
Current State,action,reward,Response time,Next State:  (6, 10.388469398680568) 3 23.0 1084.48294874 (10.344006106602812, 7)
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 3 22.0 1020.43379601 (10.319026962956018, 8)
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 3 21.0 974.136895449 (10.30224719189987, 9)
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 0 23.0 962.389338906 (10.278181486298042, 7)
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 2 23.0 1016.33182085 (10.268274366284802, 7)
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 4 21.0 1015.71444152 (10.335411397720526, 9)
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 4 19.0 964.125321415 (10.305649118067803, 11)
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 1 22.0 958.935899728 (10.369891240151098, 8)
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 0 24.0 977.109647703 (10.316955310454549, 6)
Current State,action,reward,Response time,Next State:  (6, 10.316955310454549) 2 24.0 1079.75307088 (10.333617326102203, 6)
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 4 22.0 1080.85508169 (10.390165524255663, 8)
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 0 23.0 968.865866662 (10.546025383098053, 7)
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 1 24.0 1033.02297692 (10.655373370049301, 6)
Current State,action,reward,Response time,Next State:  (6, 10.655373370049301) 3 23.0 1102.13573879 (10.624473674922116, 7)
Current State,action,reward,Response time,Next State:  (7, 10.624473674922116) 4 21.0 1037.91161802 (10.771376986314287, 9)
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 0 22.0 957.1563561 (11.039747673816453, 8)
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 4 20.0 1016.25926965 (11.271571944085663, 10)
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 2 21.0 1252.10338759 (16.466876895473597, 9)
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 1 22.0 1285.07728144 (16.871606159345866, 8)
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 0 24.0 1357.1010433 (17.534967586021782, 6)
Current State,action,reward,Response time,Next State:  (6, 17.534967586021782) 4 22.0 1557.14594988 (17.669285735563751, 8)
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 1 23.0 1403.72126261 (17.944480812078613, 7)
Current State,action,reward,Response time,Next State:  (7, 17.944480812078613) 4 21.0 1494.0705337 (18.385807405229915, 9)
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 1 22.0 1385.5238237 (18.671267839956315, 8)
Current State,action,reward,Response time,Next State:  (8, 18.671267839956315) 4 20.0 1462.2819013 (19.02839494033929, 10)
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 2 20.0 1387.00434183 (19.286321916040979, 10)
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 4 19.0 1397.37841716 (19.25591252280865, 11)
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 2 20.0 1352.39307459 (17.82724819986867, 10)
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 0 22.0 1323.29060362 (17.229782241685768, 8)
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 0 24.0 1378.03457101 (16.84211602880065, 6)
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 1 25.0 1511.32136729 (16.237094554670044, 5)
Current State,action,reward,Response time,Next State:  (5, 16.237094554670044) 0 -76.7466043459 1749.46511027 (15.950694610794756, 3)
Current State,action,reward,Response time,Next State:  (3, 15.950694610794756) 1 -230.364071496 3037.46604346 (15.828704162850809, 2)
Current State,action,reward,Response time,Next State:  (2, 15.828704162850809) 4 26.0 4583.64071496 (15.550833128512703, 4)
Current State,action,reward,Response time,Next State:  (4, 15.550833128512703) 4 24.0 1951.71278156 (15.446694946204717, 6)
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 1 25.0 1419.0294644 (15.750501603468638, 5)
Current State,action,reward,Response time,Next State:  (5, 15.750501603468638) 3 24.0 1704.75586919 (15.817158911312735, 6)
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 4 22.0 1443.53161985 (15.829956988360925, 8)
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 0 23.0 1255.00488935 (15.954793861767499, 7)
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 4 21.0 1370.07974724 (16.004586266677634, 9)
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 1 22.0 1260.87864843 (16.017694914042416, 8)
Current State,action,reward,Response time,Next State:  (8, 16.017694914042416) 0 24.0 1307.19437562 (15.947547279389703, 6)
Current State,action,reward,Response time,Next State:  (6, 15.947547279389703) 3 23.0 1452.15538995 (16.11465619633363, 7)
Current State,action,reward,Response time,Next State:  (7, 16.11465619633363) 2 23.0 1380.04184534 (16.147078378791146, 7)
Current State,action,reward,Response time,Next State:  (7, 16.147078378791146) 4 21.0 1382.06228977 (16.229253414601111, 9)
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 0 23.0 1272.63886489 (16.295120821876548, 7)
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 0 25.0 1391.28781087 (16.667936385136993, 5)
Current State,action,reward,Response time,Next State:  (5, 16.667936385136993) 4 23.0 1789.05181455 (16.836383524612351, 7)
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 4 21.0 1425.01753312 (16.845818065953559, 9)
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 2 21.0 1304.91298164 (17.052961248403161, 9)
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 2 21.0 1315.75590499 (17.215992726625572, 9)
############ Running episode number: 9  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 4 19.0 1031.28983953 (11.469111876584304, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 4 19.0 1000.33221268 (10.995673623987257, 11)
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 1 22.0 980.32686333 (10.58735855349979, 8)
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 1 22.0 975.508144832 (10.553846649940214, 8)
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 2 22.0 987.860883917 (10.489125480251131, 8)
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 1 23.0 984.078268423 (10.448897752470936, 7)
Current State,action,reward,Response time,Next State:  (7, 10.448897752470936) 4 21.0 1026.97030049 (10.433149880183072, 9)
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 0 22.0 931.538941947 (10.370942817486826, 8)
Current State,action,reward,Response time,Next State:  (8, 10.370942817486826) 4 20.0 977.171106925 (10.42733414151318, 10)
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 0 22.0 930.768881517 (10.388469398680568, 8)
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 2 22.0 978.19544437 (10.344006106602812, 8)
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 2 22.0 975.596796379 (10.319026962956018, 8)
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 4 20.0 974.136895449 (10.30224719189987, 10)
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 0 22.0 924.133757854 (10.278181486298042, 8)
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 1 23.0 971.749689939 (10.268274366284802, 7)
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 1 24.0 1015.71444152 (10.335411397720526, 6)
Current State,action,reward,Response time,Next State:  (6, 10.335411397720526) 0 26.0 1080.97373999 (10.305649118067803, 4)
Current State,action,reward,Response time,Next State:  (4, 10.305649118067803) 2 26.0 1355.83685161 (10.24826025489064, 4)
Current State,action,reward,Response time,Next State:  (4, 10.24826025489064) 3 25.0 1349.31722482 (10.276491935146446, 5)
Current State,action,reward,Response time,Next State:  (5, 10.276491935146446) 1 26.0 1201.79170309 (10.236991269871366, 4)
Current State,action,reward,Response time,Next State:  (4, 10.236991269871366) 3 25.0 1348.03701865 (10.236272697871373, 5)
Current State,action,reward,Response time,Next State:  (5, 10.236272697871373) 3 24.0 1198.09627024 (10.369891240151098, 6)
Current State,action,reward,Response time,Next State:  (6, 10.369891240151098) 3 23.0 1083.25420594 (10.316955310454549, 7)
Current State,action,reward,Response time,Next State:  (7, 10.316955310454549) 4 21.0 1018.74807882 (10.333617326102203, 9)
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 4 19.0 964.03141062 (10.390165524255663, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 2 21.0 980.873751654 (10.624473674922116, 9)
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 1 22.0 979.256305105 (10.771376986314287, 8)
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 2 22.0 1000.57439983 (10.924797168745895, 8)
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 3 21.0 1009.54101094 (11.039747673816453, 9)
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 4 19.0 1000.99384957 (11.271571944085663, 11)
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 0 23.0 1061.68473805 (12.501496275411796, 7)
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 1 24.0 1154.88153049 (13.168618569876575, 6)
Current State,action,reward,Response time,Next State:  (6, 13.168618569876575) 0 26.0 1268.35952947 (13.649658108197247, 4)
Current State,action,reward,Response time,Next State:  (4, 13.649658108197247) 4 24.0 1735.73095309 (14.283719188889453, 6)
Current State,action,reward,Response time,Next State:  (6, 14.283719188889453) 4 22.0 1342.11128751 (14.677479537099185, 8)
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 2 22.0 1228.86576266 (15.353965082180355, 8)
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 0 23.0 1252.10338759 (16.466876895473597, 7)
Current State,action,reward,Response time,Next State:  (7, 16.466876895473597) 0 25.0 1401.99108791 (16.871606159345866, 5)
Current State,action,reward,Response time,Next State:  (5, 16.871606159345866) 4 23.0 1807.76544576 (17.534967586021782, 7)
Current State,action,reward,Response time,Next State:  (7, 17.534967586021782) 4 21.0 1468.55100815 (17.669285735563751, 9)
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 2 21.0 1348.01745033 (17.944480812078613, 9)
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 1 22.0 1362.4225545 (18.385807405229915, 8)
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 0 24.0 1445.59822471 (18.671267839956315, 6)
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 4 22.0 1632.29983282 (19.02839494033929, 8)
Current State,action,reward,Response time,Next State:  (8, 19.02839494033929) 3 21.0 1483.15412147 (19.286321916040979, 9)
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 2 21.0 1432.66131431 (19.340464848017284, 9)
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 2 21.0 1435.4954296 (19.213467265587269, 9)
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 4 19.0 1428.84773289 (19.140765783401285, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 1 21.0 1399.0728054 (19.08360399753829, 9)
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 0 23.0 1422.05003169 (18.668181536495972, 7)
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 1 24.0 1539.16919707 (18.375894992990247, 6)
Current State,action,reward,Response time,Next State:  (6, 18.375894992990247) 1 25.0 1612.76413679 (17.82724819986867, 5)
Current State,action,reward,Response time,Next State:  (5, 17.82724819986867) 0 -101.90257811 1895.57195892 (17.229782241685768, 3)
Current State,action,reward,Response time,Next State:  (3, 17.229782241685768) 2 -94.2782990187 3289.0257811 (16.84211602880065, 3)
Current State,action,reward,Response time,Next State:  (3, 16.84211602880065) 2 -82.3792672693 3212.78299019 (16.237094554670044, 3)
Current State,action,reward,Response time,Next State:  (3, 16.237094554670044) 3 26.0 3093.79267269 (15.950694610794756, 4)
Current State,action,reward,Response time,Next State:  (4, 15.950694610794756) 1 -74.3474032014 1997.13880134 (15.828704162850809, 3)
Current State,action,reward,Response time,Next State:  (3, 15.828704162850809) 1 -221.722754431 3013.47403201 (15.550833128512703, 2)
Current State,action,reward,Response time,Next State:  (2, 15.550833128512703) 4 26.0 4497.22754431 (15.446694946204717, 4)
Current State,action,reward,Response time,Next State:  (4, 15.446694946204717) 2 26.0 1939.88222688 (15.750501603468638, 4)
Current State,action,reward,Response time,Next State:  (4, 15.750501603468638) 4 24.0 1974.39599686 (15.817158911312735, 6)
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 2 24.0 1443.53161985 (15.829956988360925, 6)
Current State,action,reward,Response time,Next State:  (6, 15.829956988360925) 0 26.0 1444.37807323 (15.892373986997768, 4)
Current State,action,reward,Response time,Next State:  (4, 15.892373986997768) 1 -76.827224819 1990.51332244 (15.954793861767499, 3)
Current State,action,reward,Response time,Next State:  (3, 15.954793861767499) 2 -77.8064981709 3038.27224819 (16.004586266677634, 3)
Current State,action,reward,Response time,Next State:  (3, 16.004586266677634) 3 25.5249670084 3048.06498171 (16.017694914042416, 4)
Current State,action,reward,Response time,Next State:  (4, 16.017694914042416) 3 25.0 2004.75032992 (15.947547279389703, 5)
Current State,action,reward,Response time,Next State:  (5, 15.947547279389703) 3 24.0 1722.86086353 (16.11465619633363, 6)
Current State,action,reward,Response time,Next State:  (6, 16.11465619633363) 0 24.0551140495 1463.20782432 (16.147078378791146, 4)
Current State,action,reward,Response time,Next State:  (4, 16.147078378791146) 4 24.0 2019.4488595 (16.229253414601111, 6)
Current State,action,reward,Response time,Next State:  (6, 16.229253414601111) 2 24.0 1470.78718189 (16.295120821876548, 6)
Current State,action,reward,Response time,Next State:  (6, 16.295120821876548) 4 22.0 1475.14359332 (16.667936385136993, 8)
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 2 22.0 1345.1976051 (16.836383524612351, 8)
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 4 20.0 1355.04246364 (16.845818065953559, 10)
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 2 20.0 1271.23153331 (17.052961248403161, 10)
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 0 22.0 1282.21925533 (17.215992726625572, 8)
############ Running episode number: 10  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 2 22.0 1070.83307124 (11.786394321941378, 8)
Current State,action,reward,Response time,Next State:  (8, 11.786394321941378) 4 20.0 1059.89687994 (11.61852219546234, 10)
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 4 19.0 986.02903554 (11.336751742492702, 11)
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 4 19.0 1012.3192433 (11.027107764209074, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 1 22.0 975.508144832 (10.553846649940214, 8)
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 1 23.0 987.860883917 (10.489125480251131, 7)
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 4 21.0 1029.47716098 (10.448897752470936, 9)
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 2 21.0 970.065772031 (10.433149880183072, 9)
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 0 22.0 931.538941947 (10.370942817486826, 8)
Current State,action,reward,Response time,Next State:  (8, 10.370942817486826) 2 22.0 977.171106925 (10.42733414151318, 8)
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 3 21.0 980.466886297 (10.388469398680568, 9)
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 2 21.0 966.902645924 (10.344006106602812, 9)
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 4 19.0 964.575212011 (10.319026962956018, 11)
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 1 22.0 966.991429728 (10.425974763084863, 8)
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 2 22.0 980.387437704 (10.546025383098053, 8)
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 4 20.0 987.40377158 (10.655373370049301, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 4 19.0 942.865015335 (10.624473674922116, 11)
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 4 19.0 994.97675791 (11.039747673816453, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 1 21.0 1101.69413046 (14.283719188889453, 9)
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 0 23.0 1170.79974938 (14.677479537099185, 7)
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 2 23.0 1290.48169407 (15.353965082180355, 7)
Current State,action,reward,Response time,Next State:  (7, 15.353965082180355) 2 23.0 1332.63806181 (15.836943704090487, 7)
Current State,action,reward,Response time,Next State:  (7, 15.836943704090487) 3 22.0 1362.73571067 (16.466876895473597, 8)
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 1 23.0 1333.44672445 (16.871606159345866, 7)
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 4 21.0 1427.21249257 (17.534967586021782, 9)
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 1 22.0 1340.98655806 (17.669285735563751, 8)
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 4 20.0 1403.72126261 (17.944480812078613, 10)
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 4 19.0 1429.39747342 (19.25591252280865, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 2 21.0 1400.30471596 (18.375894992990247, 9)
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 2 21.0 1385.00495784 (17.82724819986867, 9)
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 2 21.0 1356.28600579 (17.229782241685768, 9)
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 2 20.0 1271.03516211 (16.237094554670044, 10)
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 2 20.0 1238.94234737 (15.950694610794756, 10)
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 0 22.0 1223.7505224 (15.828704162850809, 8)
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 1 23.0 1296.14884991 (15.550833128512703, 7)
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 4 21.0 1344.9062349 (15.446694946204717, 9)
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 0 23.0 1231.67579099 (15.750501603468638, 7)
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 0 25.0 1357.34892154 (15.817158911312735, 5)
Current State,action,reward,Response time,Next State:  (5, 15.817158911312735) 3 24.0 1710.88049068 (15.829956988360925, 6)
Current State,action,reward,Response time,Next State:  (6, 15.829956988360925) 4 22.0 1444.37807323 (15.892373986997768, 8)
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 3 21.0 1299.87001973 (15.954793861767499, 9)
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 2 21.0 1258.27226176 (16.004586266677634, 9)
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 1 21.0 1227.30449265 (15.947547279389703, 9)
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 4 19.0 1257.89293893 (16.11465619633363, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 4 19.0 1261.79596106 (16.836383524612351, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
############ Running episode number: 11  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 2395.27001953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 2274.22705078125
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 3200.767578125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
loss 2417.70361328125
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 2062.160400390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 1177.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 3236.453125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 2766.39794921875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 0 22.0 960.915933313 (10.931193889570471, 8)
loss 1956.13427734375
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 4 20.0 1009.91486598 (10.816918347608043, 10)
loss 2001.7410888671875
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 2 20.0 951.434021987 (10.819208572963639, 10)
loss 2306.063232421875
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
loss 5161.89404296875
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 4474.54150390625
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 1 21.0 949.051873418 (10.644925616761762, 9)
loss 3076.9892578125
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 2 21.0 980.32686333 (10.58735855349979, 9)
loss 2513.792236328125
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 0 23.0 977.313511661 (10.552868829802469, 7)
loss 3432.84326171875
Current State,action,reward,Response time,Next State:  (7, 10.552868829802469) 2 23.0 1033.44943815 (10.553846649940214, 7)
loss 5148.7548828125
Current State,action,reward,Response time,Next State:  (7, 10.553846649940214) 3 22.0 1033.5103727 (10.489125480251131, 8)
loss 4109.78564453125
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 3 21.0 984.078268423 (10.448897752470936, 9)
loss 992.705322265625
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 4 19.0 970.065772031 (10.433149880183072, 11)
loss 1843.656494140625
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 4477.818359375
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 2049.312255859375
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 3060.7509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 2742.23974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 2009.15478515625
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 1631.553955078125
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 1488.400634765625
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 2 20.0 924.133757854 (10.278181486298042, 10)
loss 3564.76806640625
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 1265.5859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 2443.250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 3438.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 2245.1650390625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 1290.9798583984375
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 2 20.0 922.767593645 (10.236991269871366, 10)
loss 4185.41357421875
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 0 22.0 920.672316722 (10.236272697871373, 8)
loss 2870.668212890625
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 2 22.0 969.300339391 (10.369891240151098, 8)
loss 2731.560546875
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 4 20.0 977.109647703 (10.316955310454549, 10)
loss 3811.281005859375
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 0 22.0 924.913936648 (10.333617326102203, 8)
loss 3661.59814453125
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 0 24.0 974.989626232 (10.390165524255663, 6)
loss 3798.64990234375
Current State,action,reward,Response time,Next State:  (6, 10.390165524255663) 3 23.0 1084.59512897 (10.425974763084863, 7)
loss 3317.1767578125
Current State,action,reward,Response time,Next State:  (7, 10.425974763084863) 1 24.0 1025.54181472 (10.546025383098053, 6)
loss 3521.926025390625
Current State,action,reward,Response time,Next State:  (6, 10.546025383098053) 3 23.0 1094.90356069 (10.655373370049301, 7)
loss 3417.1142578125
Current State,action,reward,Response time,Next State:  (7, 10.655373370049301) 3 22.0 1039.83718601 (10.624473674922116, 8)
loss 2278.9091796875
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 1 23.0 991.988665914 (10.771376986314287, 7)
loss 2619.03271484375
Current State,action,reward,Response time,Next State:  (7, 10.771376986314287) 3 22.0 1047.06615216 (10.924797168745895, 8)
loss 2338.943603515625
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 0 24.0 1009.54101094 (11.039747673816453, 6)
loss 4661.4169921875
Current State,action,reward,Response time,Next State:  (6, 11.039747673816453) 3 23.0 1127.55791161 (11.271571944085663, 7)
loss 2528.072265625
Current State,action,reward,Response time,Next State:  (7, 11.271571944085663) 1 24.0 1078.23666679 (11.670334358779868, 6)
loss 1613.968017578125
Current State,action,reward,Response time,Next State:  (6, 11.670334358779868) 0 26.0 1169.26435121 (11.819721938468785, 4)
loss 2766.86376953125
Current State,action,reward,Response time,Next State:  (4, 11.819721938468785) 3 25.0 1527.84217079 (12.19918626616789, 5)
loss 1906.6396484375
Current State,action,reward,Response time,Next State:  (5, 12.19918626616789) 1 26.0 1378.45312853 (12.501496275411796, 4)
loss 3251.005126953125
Current State,action,reward,Response time,Next State:  (4, 12.501496275411796) 0 -147.63992904 1605.29472846 (13.168618569876575, 2)
loss 2007.1483154296875
Current State,action,reward,Response time,Next State:  (2, 13.168618569876575) 3 -31.4918364219 3756.3992904 (13.649658108197247, 3)
loss 4210.0439453125
Current State,action,reward,Response time,Next State:  (3, 13.649658108197247) 4 25.0 2584.91836422 (14.283719188889453, 5)
loss 4270.5625
Current State,action,reward,Response time,Next State:  (5, 14.283719188889453) 1 26.0 1569.98464322 (14.677479537099185, 4)
loss 2264.291259765625
Current State,action,reward,Response time,Next State:  (4, 14.677479537099185) 3 25.0 1852.49597955 (15.353965082180355, 5)
loss 2335.305908203125
Current State,action,reward,Response time,Next State:  (5, 15.353965082180355) 1 26.0 1668.32121291 (15.836943704090487, 4)
loss 2322.295166015625
Current State,action,reward,Response time,Next State:  (4, 15.836943704090487) 1 -86.8984248291 1984.21619897 (16.466876895473597, 3)
loss 2582.07568359375
Current State,action,reward,Response time,Next State:  (3, 16.466876895473597) 3 15.8241603947 3138.98424829 (16.871606159345866, 4)
loss 3712.3837890625
Current State,action,reward,Response time,Next State:  (4, 16.871606159345866) 2 8.28808337202 2101.75839605 (17.534967586021782, 4)
loss 4743.68408203125
Current State,action,reward,Response time,Next State:  (4, 17.534967586021782) 2 6.76217022756 2177.11916628 (17.669285735563751, 4)
loss 2730.143798828125
Current State,action,reward,Response time,Next State:  (4, 17.669285735563751) 2 3.63583334771 2192.37829772 (17.944480812078613, 4)
loss 2421.93017578125
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 4 24.0 2223.64166652 (18.385807405229915, 6)
loss 3809.155517578125
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 2 24.0 1613.41973487 (18.671267839956315, 6)
loss 3130.408935546875
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 3 23.0 1632.29983282 (19.02839494033929, 7)
loss 2597.193603515625
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 1 24.0 1561.61651886 (19.286321916040979, 6)
loss 3207.26953125
Current State,action,reward,Response time,Next State:  (6, 19.286321916040979) 0 -12.2231581375 1672.97895956 (19.340464848017284, 4)
loss 1342.8560791015625
Current State,action,reward,Response time,Next State:  (4, 19.340464848017284) 3 22.7059154527 2382.23158138 (19.213467265587269, 5)
loss 1797.3199462890625
Current State,action,reward,Response time,Next State:  (5, 19.213467265587269) 1 -9.95448909562 2022.94084547 (19.140765783401285, 4)
loss 1897.53955078125
Current State,action,reward,Response time,Next State:  (4, 19.140765783401285) 3 21.1239903788 2359.54489096 (19.385636054792762, 5)
loss 2063.138671875
Current State,action,reward,Response time,Next State:  (5, 19.385636054792762) 2 22.6094185224 2038.76009621 (19.223969507401588, 5)
loss 1750.241943359375
Current State,action,reward,Response time,Next State:  (5, 19.223969507401588) 3 24.0 2023.90581478 (19.25591252280865, 6)
loss 1865.8211669921875
Current State,action,reward,Response time,Next State:  (6, 19.25591252280865) 1 23.8991280022 1670.96770947 (19.08360399753829, 5)
loss 922.2825317382812
Current State,action,reward,Response time,Next State:  (5, 19.08360399753829) 2 25.0 2011.00871998 (18.668181536495972, 5)
loss 2828.907958984375
Current State,action,reward,Response time,Next State:  (5, 18.668181536495972) 0 -124.443318562 1972.8387816 (18.375894992990247, 3)
loss 2874.829833984375
Current State,action,reward,Response time,Next State:  (3, 18.375894992990247) 2 -113.653014598 3514.43318562 (17.82724819986867, 3)
loss 2289.333984375
Current State,action,reward,Response time,Next State:  (3, 17.82724819986867) 2 -101.90257811 3406.53014598 (17.229782241685768, 3)
loss 2478.299560546875
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 17.229782241685768) 0 -261.879458741 3289.0257811 (16.84211602880065, 2)
loss 1727.0704345703125
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.84211602880065) 1 -243.064318827 4898.79458741 (16.237094554670044, 2)
loss 704.453857421875
Current State,action,reward,Response time,Next State:  (2, 16.237094554670044) 2 -234.157767166 4710.64318827 (15.950694610794756, 2)
loss 2459.568603515625
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.950694610794756) 1 -230.364071496 4621.57767166 (15.828704162850809, 2)
loss 2203.9375
Current State,action,reward,Response time,Next State:  (2, 15.828704162850809) 3 -68.8824793909 4583.64071496 (15.550833128512703, 3)
loss 2778.3134765625
Current State,action,reward,Response time,Next State:  (3, 15.550833128512703) 4 25.0 2958.82479391 (15.446694946204717, 5)
loss 1972.053466796875
Current State,action,reward,Response time,Next State:  (5, 15.446694946204717) 1 26.0 1676.84143877 (15.750501603468638, 4)
loss 2494.72265625
Current State,action,reward,Response time,Next State:  (4, 15.750501603468638) 0 -230.005033789 1974.39599686 (15.817158911312735, 2)
loss 3245.611572265625
Current State,action,reward,Response time,Next State:  (2, 15.817158911312735) 3 -74.3720426748 4580.05033789 (15.829956988360925, 3)
loss 3304.658203125
Current State,action,reward,Response time,Next State:  (3, 15.829956988360925) 1 -232.344094852 3013.72042675 (15.892373986997768, 2)
loss 2388.363037109375
Current State,action,reward,Response time,Next State:  (2, 15.892373986997768) 4 26.0 4603.44094852 (15.954793861767499, 4)
loss 2775.4140625
Current State,action,reward,Response time,Next State:  (4, 15.954793861767499) 2 25.6738869972 1997.60449424 (16.004586266677634, 4)
loss 1825.8912353515625
Current State,action,reward,Response time,Next State:  (4, 16.004586266677634) 4 24.0 2003.26113003 (16.017694914042416, 6)
loss 2311.877685546875
Current State,action,reward,Response time,Next State:  (6, 16.017694914042416) 0 26.0 1456.7948918 (15.947547279389703, 4)
loss 2422.779541015625
Current State,action,reward,Response time,Next State:  (4, 15.947547279389703) 4 24.0 1996.78125067 (16.11465619633363, 6)
loss 1349.64013671875
Current State,action,reward,Response time,Next State:  (6, 16.11465619633363) 3 23.0 1463.20782432 (16.147078378791146, 7)
loss 4882.97509765625
Current State,action,reward,Response time,Next State:  (7, 16.147078378791146) 3 22.0 1382.06228977 (16.229253414601111, 8)
loss 2159.5087890625
Current State,action,reward,Response time,Next State:  (8, 16.229253414601111) 4 20.0 1319.55886882 (16.295120821876548, 10)
loss 3056.700927734375
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 3736.37646484375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 2469.412109375
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 2 21.0 1304.41912996 (16.845818065953559, 9)
loss 3489.480224609375
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 0 23.0 1304.91298164 (17.052961248403161, 7)
loss 3234.235595703125
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 2 23.0 1438.51394887 (17.215992726625572, 7)
loss 1235.0887451171875
############ Running episode number: 12  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 2920.408203125
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 4852.142578125
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 2672.062255859375
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 5569.26220703125
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 0 22.0 986.02903554 (11.336751742492702, 8)
loss 2070.6455078125
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 3 21.0 1033.61761156 (11.25610796929319, 9)
loss 2760.5302734375
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 1894.6009521484375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 4 19.0 962.583328739 (10.995673623987257, 11)
loss 4376.44091796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 2564.3955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 3329.259521484375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 1426.2550048828125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 4 19.0 951.555504911 (10.768325938188134, 11)
loss 1372.8468017578125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 2107.9560546875
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 1 22.0 986.979077927 (10.644925616761762, 8)
loss 3215.244873046875
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 3 21.0 993.183975462 (10.58735855349979, 9)
loss 2827.858154296875
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 3 20.0 977.313511661 (10.552868829802469, 10)
loss 3577.140625
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 1 21.0 937.427755072 (10.553846649940214, 9)
loss 1639.417724609375
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 1509.02197265625
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 2 20.0 934.046546974 (10.448897752470936, 10)
loss 2957.948486328125
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 0 22.0 931.912703681 (10.433149880183072, 8)
loss 1145.9520263671875
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 0 24.0 980.806785952 (10.44185150623065, 6)
loss 4572.88427734375
Current State,action,reward,Response time,Next State:  (6, 10.44185150623065) 4 22.0 1088.01359361 (10.370942817486826, 8)
loss 4740.841796875
Current State,action,reward,Response time,Next State:  (8, 10.370942817486826) 3 21.0 977.171106925 (10.42733414151318, 9)
loss 3336.524658203125
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 2 21.0 968.937023414 (10.388469398680568, 9)
loss 1952.278564453125
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 4 19.0 966.902645924 (10.344006106602812, 11)
loss 2860.2392578125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 3603.958251953125
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 4 19.0 963.267677113 (10.30224719189987, 11)
loss 4750.20263671875
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 4176.25244140625
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 1 21.0 922.857214352 (10.268274366284802, 9)
loss 2183.310302734375
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 2 21.0 960.611029141 (10.335411397720526, 9)
loss 1207.0869140625
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 1 22.0 964.125321415 (10.305649118067803, 8)
loss 2401.001220703125
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 1 23.0 973.355030047 (10.24826025489064, 7)
loss 1732.56298828125
Current State,action,reward,Response time,Next State:  (7, 10.24826025489064) 3 22.0 1014.46722752 (10.276491935146446, 8)
loss 4364.3310546875
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 4 20.0 971.650944469 (10.236991269871366, 10)
loss 1325.508056640625
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 2 20.0 920.672316722 (10.236272697871373, 10)
loss 3103.03955078125
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 1 21.0 920.634200723 (10.369891240151098, 9)
loss 4613.46142578125
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 2 21.0 965.930171009 (10.316955310454549, 9)
loss 2026.7830810546875
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 897.5078125
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 1 21.0 925.797758139 (10.390165524255663, 9)
loss 2088.5595703125
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 1912.7708740234375
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 1 21.0 930.696774523 (10.546025383098053, 9)
loss 1603.15966796875
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 4 19.0 975.14992417 (10.655373370049301, 11)
loss 3224.03564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 1388.0677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 2172.28125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 3383.99560546875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 7090.09765625
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 2391.84765625
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 3441.1083984375
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 1949.5845947265625
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 2294.2099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1430.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 2427.83740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 863.268798828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 2194.8740234375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 5299.0625
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 4050.0703125
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 1064.2073974609375
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 1216.68310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 3642.043212890625
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 1655.1195068359375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 1 21.0 1307.78684385 (17.669285735563751, 9)
loss 3462.733642578125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 1 22.0 1348.01745033 (17.944480812078613, 8)
loss 1897.579345703125
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 0 24.0 1419.80498244 (18.385807405229915, 6)
loss 3498.549072265625
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 1 25.0 1613.41973487 (18.671267839956315, 5)
loss 5252.5244140625
Current State,action,reward,Response time,Next State:  (5, 18.671267839956315) 0 -137.276115284 1973.12235802 (19.02839494033929, 3)
loss 1682.9849853515625
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 19.02839494033929) 0 -337.890109128 3642.76115284 (19.286321916040979, 2)
loss 2984.485595703125
Current State,action,reward,Response time,Next State:  (2, 19.286321916040979) 4 -12.2231581375 5658.90109128 (19.340464848017284, 4)
loss 2778.29052734375
Current State,action,reward,Response time,Next State:  (4, 19.340464848017284) 4 24.0 2382.23158138 (19.213467265587269, 6)
loss 3097.4736328125
Current State,action,reward,Response time,Next State:  (6, 19.213467265587269) 4 22.0 1668.16041811 (19.140765783401285, 8)
loss 3181.448486328125
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 4 20.0 1489.72161235 (19.385636054792762, 10)
loss 1840.6390380859375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 4 19.0 1405.95387237 (19.223969507401588, 11)
loss 2597.4921875
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 1672.489013671875
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 1 21.0 1399.0728054 (19.08360399753829, 9)
loss 885.2801513671875
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 0 23.0 1422.05003169 (18.668181536495972, 7)
loss 3436.827880859375
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 0 25.0 1539.16919707 (18.375894992990247, 5)
loss 3129.50537109375
Current State,action,reward,Response time,Next State:  (5, 18.375894992990247) 3 24.0 1945.98284482 (17.82724819986867, 6)
loss 3004.393310546875
Current State,action,reward,Response time,Next State:  (6, 17.82724819986867) 2 24.0 1576.47712838 (17.229782241685768, 6)
loss 3035.35400390625
Current State,action,reward,Response time,Next State:  (6, 17.229782241685768) 3 23.0 1536.96126404 (16.84211602880065, 7)
loss 2799.417724609375
Current State,action,reward,Response time,Next State:  (7, 16.84211602880065) 1 24.0 1425.37476404 (16.237094554670044, 6)
loss 3117.8583984375
Current State,action,reward,Response time,Next State:  (6, 16.237094554670044) 0 26.0 1471.30578788 (15.950694610794756, 4)
loss 2596.7900390625
Current State,action,reward,Response time,Next State:  (4, 15.950694610794756) 4 24.0 1997.13880134 (15.828704162850809, 6)
loss 836.6265869140625
Current State,action,reward,Response time,Next State:  (6, 15.828704162850809) 3 23.0 1444.29521247 (15.550833128512703, 7)
loss 2607.818115234375
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 1 24.0 1344.9062349 (15.446694946204717, 6)
loss 4117.87060546875
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 3 23.0 1419.0294644 (15.750501603468638, 7)
loss 4860.78173828125
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 0 25.0 1357.34892154 (15.817158911312735, 5)
loss 2764.807861328125
Current State,action,reward,Response time,Next State:  (5, 15.817158911312735) 1 26.0 1710.88049068 (15.829956988360925, 4)
loss 4065.650146484375
Current State,action,reward,Response time,Next State:  (4, 15.829956988360925) 2 26.0 1983.42247739 (15.892373986997768, 4)
loss 1448.0457763671875
Current State,action,reward,Response time,Next State:  (4, 15.892373986997768) 2 26.0 1990.51332244 (15.954793861767499, 4)
loss 3701.3505859375
Current State,action,reward,Response time,Next State:  (4, 15.954793861767499) 1 -77.8064981709 1997.60449424 (16.004586266677634, 3)
loss 3749.55419921875
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 16.004586266677634) 0 -236.241362763 3048.06498171 (16.017694914042416, 2)
loss 4108.603515625
Current State,action,reward,Response time,Next State:  (2, 16.017694914042416) 4 26.0 4642.41362763 (15.947547279389703, 4)
loss 835.9898071289062
Current State,action,reward,Response time,Next State:  (4, 15.947547279389703) 3 25.0 1996.78125067 (16.11465619633363, 5)
loss 4289.388671875
Current State,action,reward,Response time,Next State:  (5, 16.11465619633363) 4 23.0 1738.21520197 (16.147078378791146, 7)
loss 2004.8148193359375
Current State,action,reward,Response time,Next State:  (7, 16.147078378791146) 2 23.0 1382.06228977 (16.229253414601111, 7)
loss 4189.01171875
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 0 25.0 1387.18316937 (16.295120821876548, 5)
loss 3369.376220703125
Current State,action,reward,Response time,Next State:  (5, 16.295120821876548) 3 24.0 1754.79669258 (16.667936385136993, 6)
loss 5621.7265625
Current State,action,reward,Response time,Next State:  (6, 16.667936385136993) 1 25.0 1499.80128138 (16.836383524612351, 5)
loss 2100.0439453125
Current State,action,reward,Response time,Next State:  (5, 16.836383524612351) 3 24.0 1804.52911185 (16.845818065953559, 6)
loss 3177.720703125
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 3 23.0 1511.56621672 (17.052961248403161, 7)
loss 4034.17431640625
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 4 21.0 1438.51394887 (17.215992726625572, 9)
loss 5099.92431640625
############ Running episode number: 13  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 2544.20556640625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 4 20.0 1070.83307124 (11.786394321941378, 10)
loss 1493.1923828125
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 3916.82763671875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 6000.21142578125
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 2 20.0 986.02903554 (11.336751742492702, 10)
loss 2102.77197265625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 4 19.0 979.00811241 (11.25610796929319, 11)
loss 4674.23583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 3383.814697265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 2840.396240234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 5345.9931640625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 4425.21240234375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 0 23.0 989.329834005 (10.819208572963639, 7)
loss 2611.49365234375
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 4 21.0 1050.04686027 (10.768325938188134, 9)
loss 2872.445068359375
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 2 21.0 986.786261176 (10.772009508959538, 9)
loss 3747.578857421875
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 4536.8720703125
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 3102.125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 3536.2001953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 4255.70556640625
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 3170.447265625
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 4994.65673828125
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 0 22.0 931.912703681 (10.433149880183072, 8)
loss 1739.7803955078125
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 0 24.0 980.806785952 (10.44185150623065, 6)
loss 5171.4697265625
Current State,action,reward,Response time,Next State:  (6, 10.44185150623065) 3 23.0 1088.01359361 (10.370942817486826, 7)
loss 4125.92333984375
Current State,action,reward,Response time,Next State:  (7, 10.370942817486826) 0 25.0 1022.11240377 (10.42733414151318, 5)
loss 3189.02880859375
Current State,action,reward,Response time,Next State:  (5, 10.42733414151318) 0 27.0 1215.65142003 (10.388469398680568, 3)
loss 4864.072265625
Current State,action,reward,Response time,Next State:  (3, 10.388469398680568) 2 27.0 1943.53636946 (10.344006106602812, 3)
loss 2784.337158203125
Current State,action,reward,Response time,Next State:  (3, 10.344006106602812) 1 -59.0224715211 1934.79171911 (10.319026962956018, 2)
loss 2643.7685546875
Current State,action,reward,Response time,Next State:  (2, 10.319026962956018) 4 26.0 2870.22471521 (10.30224719189987, 4)
loss 1867.5833740234375
Current State,action,reward,Response time,Next State:  (4, 10.30224719189987) 3 25.0 1355.45037786 (10.278181486298042, 5)
loss 3988.360595703125
Current State,action,reward,Response time,Next State:  (5, 10.278181486298042) 4 23.0 1201.9469428 (10.268274366284802, 7)
loss 3890.19921875
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 4 21.0 1015.71444152 (10.335411397720526, 9)
loss 2478.254150390625
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 2667.14697265625
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 0 22.0 924.314209939 (10.24826025489064, 8)
loss 2857.2490234375
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 1 23.0 970.000949704 (10.276491935146446, 7)
loss 1433.63525390625
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 3 22.0 1016.22653355 (10.236991269871366, 8)
loss 2484.55126953125
Current State,action,reward,Response time,Next State:  (8, 10.236991269871366) 4 20.0 969.342336184 (10.236272697871373, 10)
loss 1905.4425048828125
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 0 22.0 920.634200723 (10.369891240151098, 8)
loss 3413.818603515625
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 3 21.0 977.109647703 (10.316955310454549, 9)
loss 3022.2265625
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 1 22.0 963.159236328 (10.333617326102203, 8)
loss 1958.55615234375
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 0 24.0 974.989626232 (10.390165524255663, 6)
loss 1413.6558837890625
Current State,action,reward,Response time,Next State:  (6, 10.390165524255663) 1 25.0 1084.59512897 (10.425974763084863, 5)
loss 737.9048461914062
Current State,action,reward,Response time,Next State:  (5, 10.425974763084863) 4 23.0 1215.52651732 (10.546025383098053, 7)
loss 3772.390380859375
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 3 22.0 1033.02297692 (10.655373370049301, 8)
loss 2479.530517578125
Current State,action,reward,Response time,Next State:  (8, 10.655373370049301) 1 23.0 993.794592261 (10.624473674922116, 7)
loss 3485.99658203125
Current State,action,reward,Response time,Next State:  (7, 10.624473674922116) 4 21.0 1037.91161802 (10.771376986314287, 9)
loss 4588.8154296875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 0 23.0 986.945968488 (10.924797168745895, 7)
loss 4018.670166015625
Current State,action,reward,Response time,Next State:  (7, 10.924797168745895) 1 24.0 1056.62679639 (11.039747673816453, 6)
loss 4411.4658203125
Current State,action,reward,Response time,Next State:  (6, 11.039747673816453) 0 26.0 1127.55791161 (11.271571944085663, 4)
loss 2134.77099609375
Current State,action,reward,Response time,Next State:  (4, 11.271571944085663) 2 26.0 1465.56992505 (11.670334358779868, 4)
loss 3073.901611328125
Current State,action,reward,Response time,Next State:  (4, 11.670334358779868) 1 4.49774331034 1510.87108593 (11.819721938468785, 3)
loss 2886.915771484375
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 11.819721938468785) 0 -117.49223146 2225.0225669 (12.19918626616789, 2)
loss 2623.1220703125
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 12.19918626616789) 1 -126.893559123 3454.9223146 (12.501496275411796, 2)
loss 4384.45703125
Current State,action,reward,Response time,Next State:  (2, 12.501496275411796) 2 -147.63992904 3548.93559123 (13.168618569876575, 2)
loss 4787.173828125
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 13.168618569876575) 1 -162.599441396 3756.3992904 (13.649658108197247, 2)
loss 3123.16748046875
Current State,action,reward,Response time,Next State:  (2, 13.649658108197247) 3 -43.9619936853 3905.99441396 (14.283719188889453, 3)
loss 5155.57080078125
Current State,action,reward,Response time,Next State:  (3, 14.283719188889453) 3 26.0 2709.61993685 (14.677479537099185, 4)
loss 5821.021484375
Current State,action,reward,Response time,Next State:  (4, 14.677479537099185) 3 25.0 1852.49597955 (15.353965082180355, 5)
loss 3172.341796875
Current State,action,reward,Response time,Next State:  (5, 15.353965082180355) 1 26.0 1668.32121291 (15.836943704090487, 4)
loss 3061.16650390625
Current State,action,reward,Response time,Next State:  (4, 15.836943704090487) 4 24.0 1984.21619897 (16.466876895473597, 6)
loss 4731.2607421875
Current State,action,reward,Response time,Next State:  (6, 16.466876895473597) 4 22.0 1486.50338648 (16.871606159345866, 8)
loss 1441.9991455078125
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 0 24.0 1357.1010433 (17.534967586021782, 6)
loss 2768.91796875
Current State,action,reward,Response time,Next State:  (6, 17.534967586021782) 4 22.0 1557.14594988 (17.669285735563751, 8)
loss 5394.12353515625
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 3 21.0 1403.72126261 (17.944480812078613, 9)
loss 733.2040405273438
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 1 22.0 1362.4225545 (18.385807405229915, 8)
loss 2472.982421875
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 4 20.0 1445.59822471 (18.671267839956315, 10)
loss 4862.24755859375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 4 19.0 1368.06085906 (19.02839494033929, 11)
loss 2880.016845703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 5427.3955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 2109.708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 2125.261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1370.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 2443.69384765625
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 2603.82568359375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 911.486572265625
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 2844.99658203125
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 2 20.0 1389.93285614 (18.668181536495972, 10)
loss 3054.574951171875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 0 22.0 1367.89714889 (18.375894992990247, 8)
loss 3063.5400390625
Current State,action,reward,Response time,Next State:  (8, 18.375894992990247) 1 23.0 1445.01889581 (17.82724819986867, 7)
loss 1879.2838134765625
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 0 25.0 1486.76498054 (17.229782241685768, 5)
loss 5015.37060546875
Current State,action,reward,Response time,Next State:  (5, 17.229782241685768) 3 24.0 1840.67545971 (16.84211602880065, 6)
loss 1845.8504638671875
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 2 24.0 1511.32136729 (16.237094554670044, 6)
loss 1864.8939208984375
Current State,action,reward,Response time,Next State:  (6, 16.237094554670044) 4 22.0 1471.30578788 (15.950694610794756, 8)
loss 2651.164306640625
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 1 23.0 1303.27855664 (15.828704162850809, 7)
loss 1030.635986328125
Current State,action,reward,Response time,Next State:  (7, 15.828704162850809) 3 22.0 1362.22224939 (15.550833128512703, 8)
loss 2618.680419921875
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 4 20.0 1279.90873428 (15.446694946204717, 10)
loss 2144.87890625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 4 19.0 1197.01631782 (15.750501603468638, 11)
loss 875.3018188476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 1299.0091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 2437.650146484375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 1855.0506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 2477.3876953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 2534.356201171875
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 2132.72412109375
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 0 22.0 1227.30449265 (15.947547279389703, 8)
loss 1346.3221435546875
Current State,action,reward,Response time,Next State:  (8, 15.947547279389703) 3 21.0 1303.0946115 (16.11465619633363, 9)
loss 1952.9189453125
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 2 21.0 1266.64026605 (16.147078378791146, 9)
loss 1733.5115966796875
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 1177.899658203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 4 19.0 1238.52642122 (16.295120821876548, 11)
loss 2411.0537109375
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 2881.812744140625
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 0 22.0 1261.79596106 (16.836383524612351, 8)
loss 2837.714599609375
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 0 24.0 1355.04246364 (16.845818065953559, 6)
loss 2609.40966796875
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 0 13.7638869682 1511.56621672 (17.052961248403161, 4)
loss 2283.0908203125
Current State,action,reward,Response time,Next State:  (4, 17.052961248403161) 0 -273.506388826 2122.36113032 (17.215992726625572, 2)
loss 1355.811279296875
############ Running episode number: 14  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 2182.376708984375
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 1 21.0 1012.7846064 (11.786394321941378, 9)
loss 2614.215576171875
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 1378.76611328125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
loss 3387.1650390625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 2 20.0 986.02903554 (11.336751742492702, 10)
loss 2574.07177734375
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 1 21.0 979.00811241 (11.25610796929319, 9)
loss 1702.5914306640625
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 1621.38623046875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 4 19.0 962.583328739 (10.995673623987257, 11)
loss 1391.3330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 1639.348876953125
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 1381.2386474609375
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 2902.32568359375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 2954.90380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 2843.361083984375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 2681.8173828125
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 4 19.0 980.32686333 (10.58735855349979, 11)
loss 867.4898681640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 1652.8309326171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 1178.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1828.61328125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 1434.1763916015625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 4 19.0 931.912703681 (10.433149880183072, 11)
loss 1531.95703125
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 2269.6044921875
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 3109.331298828125
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 1 21.0 927.777654938 (10.42733414151318, 9)
loss 1703.472900390625
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 4 19.0 968.937023414 (10.388469398680568, 11)
loss 2297.610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 2229.173828125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 2319.45263671875
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 3087.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 4547.36474609375
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 3348.375244140625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 1226.8216552734375
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 2162.162353515625
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 916.4710693359375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 1310.21435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 2574.870361328125
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 1609.94873046875
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 4 19.0 958.935899728 (10.369891240151098, 11)
loss 2854.36083984375
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 1861.7789306640625
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 1457.8565673828125
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 0 22.0 925.797758139 (10.390165524255663, 8)
loss 2199.297607421875
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 4 20.0 978.294574081 (10.425974763084863, 10)
loss 2303.13134765625
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 1443.131103515625
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 2351.6044921875
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 2518.384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 1166.1861572265625
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 1107.6781005859375
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 964.2684936523438
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 2 20.0 963.253801267 (11.271571944085663, 10)
loss 3222.1875
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 1 21.0 975.550709187 (11.670334358779868, 9)
loss 4079.9970703125
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 2194.816162109375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 2852.54541015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 1226.3172607421875
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 1468.3558349609375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 4 19.0 1076.17782493 (13.649658108197247, 11)
loss 802.02099609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 1866.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1589.7877197265625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 1626.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 3044.810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 2135.209228515625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 1362.7220458984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 1667.0863037109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 514.8563842773438
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 4358.29150390625
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 4 19.0 1362.4225545 (18.385807405229915, 11)
loss 884.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 3506.187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1911.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 2527.420166015625
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 1567.170654296875
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 0 22.0 1403.55780672 (19.213467265587269, 8)
loss 1567.0440673828125
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 2 22.0 1493.97063558 (19.140765783401285, 8)
loss 873.2409057617188
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 1433.15185546875
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 1 22.0 1437.85991935 (19.223969507401588, 8)
loss 1598.6522216796875
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 905.0661010742188
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 0 23.0 1431.06953264 (19.08360399753829, 7)
loss 1277.641845703125
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 2 23.0 1565.05696683 (18.668181536495972, 7)
loss 1544.2537841796875
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 4 21.0 1539.16919707 (18.375894992990247, 9)
loss 2103.119140625
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 0 23.0 1385.00495784 (17.82724819986867, 7)
loss 1343.22802734375
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 0 25.0 1486.76498054 (17.229782241685768, 5)
loss 1257.8330078125
Current State,action,reward,Response time,Next State:  (5, 17.229782241685768) 3 24.0 1840.67545971 (16.84211602880065, 6)
loss 1818.641845703125
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 4 22.0 1511.32136729 (16.237094554670044, 8)
loss 2695.117919921875
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 0 24.0 1320.01714264 (15.950694610794756, 6)
loss 2582.518310546875
Current State,action,reward,Response time,Next State:  (6, 15.950694610794756) 3 23.0 1452.36355164 (15.828704162850809, 7)
loss 959.4090576171875
Current State,action,reward,Response time,Next State:  (7, 15.828704162850809) 3 22.0 1362.22224939 (15.550833128512703, 8)
loss 1388.80517578125
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 4 20.0 1279.90873428 (15.446694946204717, 10)
loss 954.6783447265625
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 2 20.0 1197.01631782 (15.750501603468638, 10)
loss 1220.739501953125
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 2016.952880859375
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 3591.685791015625
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 1 21.0 1217.34610485 (15.892373986997768, 9)
loss 2272.166259765625
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 2 21.0 1255.00488935 (15.954793861767499, 9)
loss 1188.9451904296875
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 0 23.0 1258.27226176 (16.004586266677634, 7)
loss 2081.888916015625
Current State,action,reward,Response time,Next State:  (7, 16.004586266677634) 1 24.0 1373.18264715 (16.017694914042416, 6)
loss 2077.92333984375
Current State,action,reward,Response time,Next State:  (6, 16.017694914042416) 3 23.0 1456.7948918 (15.947547279389703, 7)
loss 3811.97119140625
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 2 23.0 1369.62816392 (16.11465619633363, 7)
loss 2463.21484375
Current State,action,reward,Response time,Next State:  (7, 16.11465619633363) 3 22.0 1380.04184534 (16.147078378791146, 8)
loss 2193.333740234375
Current State,action,reward,Response time,Next State:  (8, 16.147078378791146) 0 24.0 1314.7561657 (16.229253414601111, 6)
loss 2415.60986328125
Current State,action,reward,Response time,Next State:  (6, 16.229253414601111) 3 23.0 1470.78718189 (16.295120821876548, 7)
loss 1329.9224853515625
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 2 23.0 1391.28781087 (16.667936385136993, 7)
loss 3321.3759765625
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 1 24.0 1414.52045804 (16.836383524612351, 6)
loss 2402.388427734375
Current State,action,reward,Response time,Next State:  (6, 16.836383524612351) 3 23.0 1510.94222459 (16.845818065953559, 7)
loss 2306.95751953125
Current State,action,reward,Response time,Next State:  (7, 16.845818065953559) 1 24.0 1425.60546289 (17.052961248403161, 6)
loss 3621.44873046875
Current State,action,reward,Response time,Next State:  (6, 17.052961248403161) 1 25.0 1525.26648162 (17.215992726625572, 5)
loss 2359.73291015625
############ Running episode number: 15  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 4824.21533203125
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 0 24.0 1070.83307124 (11.786394321941378, 6)
loss 1545.7852783203125
Current State,action,reward,Response time,Next State:  (6, 11.786394321941378) 3 23.0 1176.94045342 (11.61852219546234, 7)
loss 1276.0889892578125
Current State,action,reward,Response time,Next State:  (7, 11.61852219546234) 3 22.0 1099.85747227 (11.469111876584304, 8)
loss 2458.23974609375
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 2 22.0 1041.35337246 (11.336751742492702, 8)
loss 1091.3387451171875
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 2 22.0 1033.61761156 (11.25610796929319, 8)
loss 1547.040771484375
Current State,action,reward,Response time,Next State:  (8, 11.25610796929319) 4 20.0 1028.90440276 (11.027107764209074, 10)
loss 2057.244873046875
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 0 22.0 962.583328739 (10.995673623987257, 8)
loss 746.71533203125
Current State,action,reward,Response time,Next State:  (8, 10.995673623987257) 0 24.0 1013.68337084 (10.931193889570471, 6)
loss 1666.168701171875
Current State,action,reward,Response time,Next State:  (6, 10.931193889570471) 1 25.0 1120.37826137 (10.816918347608043, 5)
loss 1238.4403076171875
Current State,action,reward,Response time,Next State:  (5, 10.816918347608043) 4 23.0 1251.44728215 (10.819208572963639, 7)
loss 3769.08837890625
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 2 23.0 1050.04686027 (10.768325938188134, 7)
loss 2493.19921875
Current State,action,reward,Response time,Next State:  (7, 10.768325938188134) 3 22.0 1046.87602081 (10.772009508959538, 8)
loss 1324.574951171875
Current State,action,reward,Response time,Next State:  (8, 10.772009508959538) 4 20.0 1000.61136749 (10.644925616761762, 10)
loss 1719.5693359375
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 0 22.0 942.310823749 (10.58735855349979, 8)
loss 885.6466674804688
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 1919.558349609375
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 0 23.0 975.508144832 (10.553846649940214, 7)
loss 825.4545288085938
Current State,action,reward,Response time,Next State:  (7, 10.553846649940214) 3 22.0 1033.5103727 (10.489125480251131, 8)
loss 2847.684326171875
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 4 20.0 984.078268423 (10.448897752470936, 10)
loss 1056.59814453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 4 19.0 931.912703681 (10.433149880183072, 11)
loss 2601.366943359375
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 2952.993408203125
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 4 19.0 969.696935814 (10.370942817486826, 11)
loss 1715.2674560546875
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 1571.7183837890625
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 4 19.0 968.937023414 (10.388469398680568, 11)
loss 2469.345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 3147.23095703125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 1346.163330078125
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 2903.237548828125
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 702.6995849609375
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 2 21.0 961.129617982 (10.268274366284802, 9)
loss 2188.857421875
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 1115.3526611328125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 4 19.0 925.892923039 (10.305649118067803, 11)
loss 541.1708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 3692.75732421875
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 1674.1568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 1985.9888916015625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 880.3755493164062
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 1014.4877319335938
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 1944.356689453125
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 1467.500732421875
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 1130.2777099609375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 2296.86572265625
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 2653.201171875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 2843.130615234375
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 1840.6102294921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 2224.5498046875
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 794.3472900390625
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 4 19.0 994.97675791 (11.039747673816453, 11)
loss 1044.8162841796875
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 1450.9132080078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 4 19.0 975.550709187 (11.670334358779868, 11)
loss 1426.8531494140625
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 2822.539306640625
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 1672.550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1933.4381103515625
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 1494.96142578125
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 0 23.0 1112.429735 (13.649658108197247, 7)
loss 735.7217407226562
Current State,action,reward,Response time,Next State:  (7, 13.649658108197247) 3 22.0 1226.43122257 (14.283719188889453, 8)
loss 1501.9854736328125
Current State,action,reward,Response time,Next State:  (8, 14.283719188889453) 3 21.0 1205.85251983 (14.677479537099185, 9)
loss 798.82373046875
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 1333.6807861328125
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 2 20.0 1192.09754638 (15.836943704090487, 10)
loss 2054.411865234375
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 2299.050537109375
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 1467.27734375
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 2 20.0 1272.5994393 (17.534967586021782, 10)
loss 2013.4261474609375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 0 22.0 1307.78684385 (17.669285735563751, 8)
loss 1460.8968505859375
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 0 24.0 1403.72126261 (17.944480812078613, 6)
loss 792.5138549804688
Current State,action,reward,Response time,Next State:  (6, 17.944480812078613) 2 24.0 1584.23078855 (18.385807405229915, 6)
loss 3249.32763671875
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 1 25.0 1613.41973487 (18.671267839956315, 5)
loss 1275.9317626953125
Current State,action,reward,Response time,Next State:  (5, 18.671267839956315) 1 -8.67790698763 1973.12235802 (19.02839494033929, 4)
loss 2814.989990234375
Current State,action,reward,Response time,Next State:  (4, 19.02839494033929) 4 24.0 2346.77906988 (19.286321916040979, 6)
loss 1232.2979736328125
Current State,action,reward,Response time,Next State:  (6, 19.286321916040979) 3 23.0 1672.97895956 (19.340464848017284, 7)
loss 2564.69189453125
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 3 22.0 1581.06369535 (19.213467265587269, 8)
loss 746.3856811523438
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
loss 1854.0333251953125
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 1 22.0 1425.04216908 (19.385636054792762, 8)
loss 1386.0909423828125
Current State,action,reward,Response time,Next State:  (8, 19.385636054792762) 4 20.0 1504.03300517 (19.223969507401588, 10)
loss 2605.43212890625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 1446.8145751953125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 533.9893798828125
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 0 23.0 1422.05003169 (18.668181536495972, 7)
loss 678.6256103515625
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 3 22.0 1539.16919707 (18.375894992990247, 8)
loss 1101.9412841796875
Current State,action,reward,Response time,Next State:  (8, 18.375894992990247) 4 20.0 1445.01889581 (17.82724819986867, 10)
loss 959.68994140625
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 2 20.0 1323.29060362 (17.229782241685768, 10)
loss 1991.9754638671875
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 1 21.0 1291.59856437 (16.84211602880065, 9)
loss 3109.637939453125
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 1964.13037109375
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 3272.38525390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 3069.955078125
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 1574.975341796875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 2089.664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 1812.6671142578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 2504.036865234375
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
loss 1472.828857421875
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 888.14599609375
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 2285.56103515625
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 1992.2890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1765.695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 752.2453002929688
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 2086.01025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 633.3427124023438
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 1726.694580078125
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 1 22.0 1272.63886489 (16.295120821876548, 8)
loss 1708.9195556640625
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 0 24.0 1323.40847593 (16.667936385136993, 6)
loss 2937.73486328125
Current State,action,reward,Response time,Next State:  (6, 16.667936385136993) 3 23.0 1499.80128138 (16.836383524612351, 7)
loss 2818.562744140625
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 4 21.0 1425.01753312 (16.845818065953559, 9)
loss 1539.4326171875
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 2 21.0 1304.91298164 (17.052961248403161, 9)
loss 1371.8912353515625
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 2503.904052734375
############ Running episode number: 16  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 2209.53466796875
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 995.4188232421875
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 705.2335815429688
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 2014.4393310546875
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 1855.171630859375
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 1848.036376953125
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 0 22.0 974.730436685 (11.027107764209074, 8)
loss 789.4197387695312
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 1 23.0 1015.52053272 (10.995673623987257, 7)
loss 1391.4708251953125
Current State,action,reward,Response time,Next State:  (7, 10.995673623987257) 0 25.0 1061.04358539 (10.931193889570471, 5)
loss 826.4140014648438
Current State,action,reward,Response time,Next State:  (5, 10.931193889570471) 3 24.0 1261.94717276 (10.816918347608043, 6)
loss 1157.85302734375
Current State,action,reward,Response time,Next State:  (6, 10.816918347608043) 3 23.0 1112.82017919 (10.819208572963639, 7)
loss 3147.890869140625
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 1 24.0 1050.04686027 (10.768325938188134, 6)
loss 1597.2615966796875
Current State,action,reward,Response time,Next State:  (6, 10.768325938188134) 3 23.0 1109.60632067 (10.772009508959538, 7)
loss 1250.2799072265625
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 0 25.0 1047.1055689 (10.644925616761762, 5)
loss 1751.9156494140625
Current State,action,reward,Response time,Next State:  (5, 10.644925616761762) 0 27.0 1235.64420805 (10.58735855349979, 3)
loss 3789.426025390625
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.58735855349979) 0 -66.2945562827 1982.65214417 (10.552868829802469, 2)
loss 3018.129638671875
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.552868829802469) 0 -66.3249648277 2942.94556283 (10.553846649940214, 2)
loss 2027.391357421875
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.553846649940214) 1 -64.3122464116 2943.24964828 (10.489125480251131, 2)
loss 2017.79931640625
Current State,action,reward,Response time,Next State:  (2, 10.489125480251131) 3 27.0 2923.12246412 (10.448897752470936, 3)
loss 3643.781494140625
Current State,action,reward,Response time,Next State:  (3, 10.448897752470936) 2 27.0 1955.42088812 (10.433149880183072, 3)
loss 1640.967529296875
Current State,action,reward,Response time,Next State:  (3, 10.433149880183072) 2 27.0 1952.32373471 (10.44185150623065, 3)
loss 617.278564453125
Current State,action,reward,Response time,Next State:  (3, 10.44185150623065) 2 27.0 1954.03509421 (10.370942817486826, 3)
loss 1126.516845703125
Current State,action,reward,Response time,Next State:  (3, 10.370942817486826) 3 26.0 1940.08939517 (10.42733414151318, 4)
loss 1746.005859375
Current State,action,reward,Response time,Next State:  (4, 10.42733414151318) 0 -61.1820132895 1369.66080447 (10.388469398680568, 2)
loss 1093.7283935546875
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.388469398680568) 1 -59.799280449 2891.82013289 (10.344006106602812, 2)
loss 1319.8233642578125
Current State,action,reward,Response time,Next State:  (2, 10.344006106602812) 4 26.0 2877.99280449 (10.319026962956018, 4)
loss 3573.167236328125
Current State,action,reward,Response time,Next State:  (4, 10.319026962956018) 3 25.0 1357.35663352 (10.30224719189987, 5)
loss 2728.292724609375
Current State,action,reward,Response time,Next State:  (5, 10.30224719189987) 4 23.0 1204.15815328 (10.278181486298042, 7)
loss 781.3466796875
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 3 22.0 1016.33182085 (10.268274366284802, 8)
loss 1784.716796875
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 4 20.0 971.170670341 (10.335411397720526, 10)
loss 2549.248046875
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 0 22.0 925.892923039 (10.305649118067803, 8)
loss 2315.061767578125
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 0 24.0 973.355030047 (10.24826025489064, 6)
loss 2275.669677734375
Current State,action,reward,Response time,Next State:  (6, 10.24826025489064) 4 22.0 1075.20964131 (10.276491935146446, 8)
loss 1955.8223876953125
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 3 21.0 971.650944469 (10.236991269871366, 9)
loss 1202.072509765625
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 2 21.0 958.973513426 (10.236272697871373, 9)
loss 2197.569580078125
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 2746.110107421875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 4 19.0 927.721874973 (10.316955310454549, 11)
loss 2742.114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 2083.553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 845.4288940429688
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 2 19.0 917.140709305 (10.425974763084863, 11)
loss 2043.25634765625
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 2612.8623046875
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 1116.736083984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 1105.5206298828125
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 695.4706420898438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 1306.451171875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 2140.049072265625
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 2016.7034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 791.8086547851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 2923.24951171875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 770.6015014648438
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 2 21.0 1061.68473805 (12.501496275411796, 9)
loss 2331.487548828125
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 2 21.0 1077.50917513 (13.168618569876575, 9)
loss 2186.128662109375
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 1573.2186279296875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 1 21.0 1101.69413046 (14.283719188889453, 9)
loss 932.4458618164062
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 1 22.0 1170.79974938 (14.677479537099185, 8)
loss 1395.8388671875
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 3 21.0 1228.86576266 (15.353965082180355, 9)
loss 2505.82373046875
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 2613.867431640625
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 2 20.0 1217.71670884 (16.466876895473597, 10)
loss 768.173095703125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 1958.9189453125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 1470.517333984375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 781.8617553710938
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 2238.084716796875
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 1573.9964599609375
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 2 20.0 1352.9188695 (18.671267839956315, 10)
loss 1507.5380859375
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 1411.676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 1164.5792236328125
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 994.2630615234375
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 4 19.0 1435.4954296 (19.213467265587269, 11)
loss 1789.603271484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 2422.670166015625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 1354.0435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 2912.41796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 2772.81494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 1690.416259765625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 1256.7811279296875
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 4 19.0 1400.30471596 (18.375894992990247, 11)
loss 719.525390625
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 1978.345703125
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 4 19.0 1356.28600579 (17.229782241685768, 11)
loss 3246.625732421875
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 1867.33349609375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 2 20.0 1271.03516211 (16.237094554670044, 10)
loss 2646.974853515625
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 734.7877197265625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 2646.0849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 2767.70556640625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 1434.700439453125
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 2 21.0 1231.67579099 (15.750501603468638, 9)
loss 2267.142333984375
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 1 22.0 1247.57857022 (15.817158911312735, 8)
loss 1312.3466796875
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
loss 1511.904052734375
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 2453.153564453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 4 19.0 1220.65695786 (15.954793861767499, 11)
loss 1476.0439453125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 1893.2470703125
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 1198.2159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1635.03173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1292.9078369140625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 2203.009765625
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 1651.830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 655.4970092773438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1524.2164306640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 1193.1627197265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 857.858642578125
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 691.0767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 1579.85595703125
############ Running episode number: 17  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 703.9909057617188
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 534.479736328125
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 4 19.0 1040.0771169 (11.61852219546234, 11)
loss 2004.0450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 1781.65576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 1563.0869140625
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 1432.921142578125
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 1538.04833984375
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 0 22.0 962.583328739 (10.995673623987257, 8)
loss 2676.990478515625
Current State,action,reward,Response time,Next State:  (8, 10.995673623987257) 2 22.0 1013.68337084 (10.931193889570471, 8)
loss 1256.537353515625
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 3 21.0 1009.91486598 (10.816918347608043, 9)
loss 2438.2060546875
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 2 21.0 989.329834005 (10.819208572963639, 9)
loss 1231.305908203125
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 1255.8759765625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 2 20.0 948.856481751 (10.772009508959538, 10)
loss 1432.7935791015625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 4 19.0 949.051873418 (10.644925616761762, 11)
loss 1848.78662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 1266.10498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 2020.877685546875
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 2399.57177734375
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 1006.0155639648438
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 2 20.0 934.046546974 (10.448897752470936, 10)
loss 1101.9083251953125
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 1 21.0 931.912703681 (10.433149880183072, 9)
loss 714.2459106445312
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 4 19.0 969.241448633 (10.44185150623065, 11)
loss 789.3889770507812
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 554.9044799804688
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 792.0089111328125
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 2 20.0 930.768881517 (10.388469398680568, 10)
loss 857.32275390625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 4 19.0 928.707336523 (10.344006106602812, 11)
loss 737.248046875
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 1650.183349609375
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 0 22.0 925.023825574 (10.30224719189987, 8)
loss 609.4220581054688
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 2 22.0 973.15620517 (10.278181486298042, 8)
loss 2301.03515625
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 1192.6927490234375
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 4 19.0 960.611029141 (10.335411397720526, 11)
loss 2411.042236328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 997.4201049804688
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 722.7095947265625
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 0 23.0 959.563389179 (10.276491935146446, 7)
loss 1610.0352783203125
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 0 25.0 1016.22653355 (10.236991269871366, 5)
loss 970.1837158203125
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 3 24.0 1198.16229423 (10.236272697871373, 6)
loss 1003.3978271484375
Current State,action,reward,Response time,Next State:  (6, 10.236272697871373) 0 26.0 1074.41679501 (10.369891240151098, 4)
loss 1278.4541015625
Current State,action,reward,Response time,Next State:  (4, 10.369891240151098) 4 24.0 1363.1350387 (10.316955310454549, 6)
loss 2074.302734375
Current State,action,reward,Response time,Next State:  (6, 10.316955310454549) 2 24.0 1079.75307088 (10.333617326102203, 6)
loss 693.1334228515625
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 3 23.0 1080.85508169 (10.390165524255663, 7)
loss 582.3849487304688
Current State,action,reward,Response time,Next State:  (7, 10.390165524255663) 2 23.0 1023.31030002 (10.425974763084863, 7)
loss 1517.7291259765625
Current State,action,reward,Response time,Next State:  (7, 10.425974763084863) 2 23.0 1025.54181472 (10.546025383098053, 7)
loss 1410.4549560546875
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 0 25.0 1033.02297692 (10.655373370049301, 5)
loss 1972.80517578125
Current State,action,reward,Response time,Next State:  (5, 10.655373370049301) 1 26.0 1236.60417082 (10.624473674922116, 4)
loss 1722.883544921875
Current State,action,reward,Response time,Next State:  (4, 10.624473674922116) 4 24.0 1392.05672091 (10.771376986314287, 6)
loss 1760.4666748046875
Current State,action,reward,Response time,Next State:  (6, 10.771376986314287) 3 23.0 1109.80811426 (10.924797168745895, 7)
loss 1635.4136962890625
Current State,action,reward,Response time,Next State:  (7, 10.924797168745895) 2 23.0 1056.62679639 (11.039747673816453, 7)
loss 1430.2880859375
Current State,action,reward,Response time,Next State:  (7, 11.039747673816453) 1 24.0 1063.7901361 (11.271571944085663, 6)
loss 636.5215454101562
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 4 22.0 1142.8905616 (11.670334358779868, 8)
loss 1943.511474609375
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 4 20.0 1053.11377918 (11.819721938468785, 10)
loss 1918.55419921875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 0 22.0 1004.62682792 (12.19918626616789, 8)
loss 605.7969360351562
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 4 20.0 1084.02242049 (12.501496275411796, 10)
loss 730.6717529296875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 616.244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 779.0072631835938
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 1644.86181640625
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 0 23.0 1170.79974938 (14.677479537099185, 7)
loss 2354.440673828125
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 3 22.0 1290.48169407 (15.353965082180355, 8)
loss 2285.81201171875
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 1653.578857421875
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 2 21.0 1252.10338759 (16.466876895473597, 9)
loss 1094.156005859375
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 1 22.0 1285.07728144 (16.871606159345866, 8)
loss 643.7351684570312
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 0 24.0 1357.1010433 (17.534967586021782, 6)
loss 1692.4853515625
Current State,action,reward,Response time,Next State:  (6, 17.534967586021782) 3 23.0 1557.14594988 (17.669285735563751, 7)
loss 2001.1768798828125
Current State,action,reward,Response time,Next State:  (7, 17.669285735563751) 4 21.0 1476.92127615 (17.944480812078613, 9)
loss 1308.3577880859375
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 606.5562133789062
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 2 20.0 1352.9188695 (18.671267839956315, 10)
loss 1196.76513671875
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 0 22.0 1368.06085906 (19.02839494033929, 8)
loss 1438.4970703125
Current State,action,reward,Response time,Next State:  (8, 19.02839494033929) 3 21.0 1483.15412147 (19.286321916040979, 9)
loss 919.7991333007812
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 0 23.0 1432.66131431 (19.340464848017284, 7)
loss 1276.4814453125
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 1 24.0 1581.06369535 (19.213467265587269, 6)
loss 887.04736328125
Current State,action,reward,Response time,Next State:  (6, 19.213467265587269) 4 22.0 1668.16041811 (19.140765783401285, 8)
loss 1226.0640869140625
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 1 23.0 1489.72161235 (19.385636054792762, 7)
loss 1715.7342529296875
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 3 22.0 1583.87861729 (19.223969507401588, 8)
loss 1387.5899658203125
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 1 23.0 1494.58443695 (19.25591252280865, 7)
loss 1104.5140380859375
Current State,action,reward,Response time,Next State:  (7, 19.25591252280865) 2 23.0 1575.79467084 (19.08360399753829, 7)
loss 1068.193359375
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 2 23.0 1565.05696683 (18.668181536495972, 7)
loss 1704.0164794921875
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 0 25.0 1539.16919707 (18.375894992990247, 5)
loss 807.4918823242188
Current State,action,reward,Response time,Next State:  (5, 18.375894992990247) 3 24.0 1945.98284482 (17.82724819986867, 6)
loss 1308.978271484375
Current State,action,reward,Response time,Next State:  (6, 17.82724819986867) 4 22.0 1576.47712838 (17.229782241685768, 8)
loss 775.131591796875
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 1 23.0 1378.03457101 (16.84211602880065, 7)
loss 1428.2408447265625
Current State,action,reward,Response time,Next State:  (7, 16.84211602880065) 1 24.0 1425.37476404 (16.237094554670044, 6)
loss 1831.09423828125
Current State,action,reward,Response time,Next State:  (6, 16.237094554670044) 0 26.0 1471.30578788 (15.950694610794756, 4)
loss 1074.7474365234375
Current State,action,reward,Response time,Next State:  (4, 15.950694610794756) 2 26.0 1997.13880134 (15.828704162850809, 4)
loss 1242.6671142578125
Current State,action,reward,Response time,Next State:  (4, 15.828704162850809) 3 25.0 1983.28015091 (15.550833128512703, 5)
loss 689.1039428710938
Current State,action,reward,Response time,Next State:  (5, 15.550833128512703) 1 26.0 1686.40988625 (15.446694946204717, 4)
loss 1589.677001953125
Current State,action,reward,Response time,Next State:  (4, 15.446694946204717) 2 26.0 1939.88222688 (15.750501603468638, 4)
loss 2371.841552734375
Current State,action,reward,Response time,Next State:  (4, 15.750501603468638) 4 24.0 1974.39599686 (15.817158911312735, 6)
loss 540.3707275390625
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 3 23.0 1443.53161985 (15.829956988360925, 7)
loss 758.1177368164062
Current State,action,reward,Response time,Next State:  (7, 15.829956988360925) 3 22.0 1362.30032139 (15.892373986997768, 8)
loss 500.8818359375
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 4 20.0 1299.87001973 (15.954793861767499, 10)
loss 1992.01611328125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 4 19.0 1223.96796344 (16.004586266677634, 11)
loss 693.6417846679688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1149.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1101.260009765625
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 872.639892578125
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 983.73046875
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 0 23.0 1268.3374073 (16.229253414601111, 7)
loss 1746.01513671875
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 3 22.0 1387.18316937 (16.295120821876548, 8)
loss 2519.1552734375
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 4 20.0 1323.40847593 (16.667936385136993, 10)
loss 1067.5384521484375
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 971.7742309570312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 1114.3751220703125
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 1974.986083984375
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 1827.0875244140625
############ Running episode number: 18  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 3240.046630859375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 1023.9510498046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 1139.430908203125
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 751.1211547851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 2445.533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 684.0624389648438
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 3180.24658203125
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 4 19.0 1000.33221268 (10.995673623987257, 11)
loss 1253.712890625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 1370.4095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 1062.3240966796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 1841.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 1460.871826171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 647.91796875
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 1312.1751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 1230.209716796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 1447.4720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 1683.47314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1363.4378662109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 3640.38720703125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 1793.899169921875
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 4 19.0 969.241448633 (10.44185150623065, 11)
loss 2208.023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1400.279052734375
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 2481.1357421875
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 0 22.0 930.768881517 (10.388469398680568, 8)
loss 1076.69091796875
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 1 23.0 978.19544437 (10.344006106602812, 7)
loss 1065.889404296875
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 3 22.0 1020.43379601 (10.319026962956018, 8)
loss 1379.5064697265625
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 3 21.0 974.136895449 (10.30224719189987, 9)
loss 1888.9000244140625
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 3516.64453125
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 0 22.0 922.857214352 (10.268274366284802, 8)
loss 709.0746459960938
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 3 21.0 971.170670341 (10.335411397720526, 9)
loss 458.7457275390625
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 465.67962646484375
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 1 21.0 924.314209939 (10.24826025489064, 9)
loss 673.7710571289062
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 1156.638427734375
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 2 20.0 922.767593645 (10.236991269871366, 10)
loss 1560.311767578125
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 706.8489379882812
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 1485.03369140625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 4 19.0 927.721874973 (10.316955310454549, 11)
loss 1448.102294921875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 508.2382507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 1399.0301513671875
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 703.9369506835938
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 4 19.0 930.696774523 (10.546025383098053, 11)
loss 647.5387573242188
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 621.4143676757812
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 1 21.0 942.865015335 (10.624473674922116, 9)
loss 606.5134887695312
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 1371.2545166015625
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 2 20.0 949.018321829 (10.924797168745895, 10)
loss 659.3739013671875
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 0 22.0 957.1563561 (11.039747673816453, 8)
loss 1303.2222900390625
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 0 24.0 1016.25926965 (11.271571944085663, 6)
loss 662.8226928710938
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 4 22.0 1142.8905616 (11.670334358779868, 8)
loss 553.6049194335938
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 3 21.0 1053.11377918 (11.819721938468785, 9)
loss 1879.2197265625
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 2 21.0 1041.82165315 (12.19918626616789, 9)
loss 508.42657470703125
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 0 23.0 1061.68473805 (12.501496275411796, 7)
loss 622.1609497070312
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 2 23.0 1154.88153049 (13.168618569876575, 7)
loss 839.5498657226562
Current State,action,reward,Response time,Next State:  (7, 13.168618569876575) 2 23.0 1196.45441106 (13.649658108197247, 7)
loss 743.4463500976562
Current State,action,reward,Response time,Next State:  (7, 13.649658108197247) 0 25.0 1226.43122257 (14.283719188889453, 5)
loss 981.6498413085938
Current State,action,reward,Response time,Next State:  (5, 14.283719188889453) 2 25.0 1569.98464322 (14.677479537099185, 5)
loss 1397.4425048828125
Current State,action,reward,Response time,Next State:  (5, 14.677479537099185) 4 23.0 1606.16421855 (15.353965082180355, 7)
loss 1054.477783203125
Current State,action,reward,Response time,Next State:  (7, 15.353965082180355) 1 24.0 1332.63806181 (15.836943704090487, 6)
loss 530.2630004882812
Current State,action,reward,Response time,Next State:  (6, 15.836943704090487) 4 22.0 1444.84016836 (16.466876895473597, 8)
loss 1705.638916015625
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 4 20.0 1333.44672445 (16.871606159345866, 10)
loss 2298.351806640625
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 1 21.0 1272.5994393 (17.534967586021782, 9)
loss 2032.0396728515625
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 1 22.0 1340.98655806 (17.669285735563751, 8)
loss 916.8074951171875
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 1 23.0 1403.72126261 (17.944480812078613, 7)
loss 763.9473266601562
Current State,action,reward,Response time,Next State:  (7, 17.944480812078613) 4 21.0 1494.0705337 (18.385807405229915, 9)
loss 1209.1817626953125
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 0 23.0 1385.5238237 (18.671267839956315, 7)
loss 561.2474365234375
Current State,action,reward,Response time,Next State:  (7, 18.671267839956315) 1 24.0 1539.36152541 (19.02839494033929, 6)
loss 1249.9700927734375
Current State,action,reward,Response time,Next State:  (6, 19.02839494033929) 4 22.0 1655.91989997 (19.286321916040979, 8)
loss 1802.862548828125
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 2 22.0 1498.22861069 (19.340464848017284, 8)
loss 977.3397216796875
Current State,action,reward,Response time,Next State:  (8, 19.340464848017284) 2 22.0 1501.39298325 (19.213467265587269, 8)
loss 1951.443603515625
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 2 22.0 1493.97063558 (19.140765783401285, 8)
loss 1055.7833251953125
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 950.844970703125
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 1095.6409912109375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 1 21.0 1397.37841716 (19.25591252280865, 9)
loss 964.7601928710938
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 4 19.0 1431.06953264 (19.08360399753829, 11)
loss 1248.7652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1965.71044921875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 1094.9312744140625
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 1586.340087890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 770.01220703125
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 3308.751220703125
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 1146.2840576171875
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 882.1293334960938
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 480.2372131347656
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 905.755859375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 1669.9505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 446.6666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 2488.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 648.9280395507812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 2609.125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 1730.2633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1778.26513671875
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 2515.246826171875
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 1084.637939453125
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 2 20.0 1223.58357506 (16.11465619633363, 10)
loss 920.90625
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 2 20.0 1232.44771583 (16.147078378791146, 10)
loss 854.91845703125
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 2 20.0 1234.16752106 (16.229253414601111, 10)
loss 1234.56396484375
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 483.51422119140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 1242.02783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 1646.2325439453125
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 1695.454345703125
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 891.2587280273438
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 0 22.0 1282.21925533 (17.215992726625572, 8)
loss 1285.096435546875
############ Running episode number: 19  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 1130.935302734375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 974.7355346679688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1368.3800048828125
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 528.2539672851562
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 647.4860229492188
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 676.140380859375
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 4 19.0 1012.3192433 (11.027107764209074, 11)
loss 677.1612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 656.3413696289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 2302.21142578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 3685.53662109375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 1305.4873046875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 1575.84716796875
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 1253.556396484375
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 0 22.0 949.051873418 (10.644925616761762, 8)
loss 615.522705078125
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 2 22.0 993.183975462 (10.58735855349979, 8)
loss 1328.6859130859375
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 4 20.0 989.819480251 (10.552868829802469, 10)
loss 1608.31591796875
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 1662.5555419921875
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 879.5840454101562
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 918.7042846679688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 2032.2603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 654.5286865234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 625.3907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 3105.745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 853.7304077148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 1021.3941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1069.6043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 1753.7294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 1296.592529296875
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 2821.270751953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 4 19.0 922.331700166 (10.335411397720526, 11)
loss 1740.126953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 576.6605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1141.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 542.946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 1171.244873046875
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 1285.5693359375
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 999.637939453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 4 19.0 927.721874973 (10.316955310454549, 11)
loss 751.821044921875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 623.5265502929688
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 675.9763793945312
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 746.1220703125
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 1033.3704833984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 1048.43115234375
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 1284.3902587890625
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 2 21.0 979.256305105 (10.771376986314287, 9)
loss 674.9376831054688
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 559.475830078125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 1 21.0 957.1563561 (11.039747673816453, 9)
loss 503.38885498046875
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 2 21.0 1000.99384957 (11.271571944085663, 9)
loss 1968.5986328125
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 523.8052368164062
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 2 20.0 996.702699398 (11.819721938468785, 10)
loss 983.228759765625
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 1 21.0 1004.62682792 (12.19918626616789, 9)
loss 615.3446655273438
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 4 19.0 1061.68473805 (12.501496275411796, 11)
loss 1637.38037109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 595.8036499023438
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 752.7529907226562
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 592.0457763671875
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 1578.735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 1023.0661010742188
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 3087.859619140625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 0 23.0 1252.10338759 (16.466876895473597, 7)
loss 492.7810363769531
Current State,action,reward,Response time,Next State:  (7, 16.466876895473597) 3 22.0 1401.99108791 (16.871606159345866, 8)
loss 1421.373779296875
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 2 22.0 1357.1010433 (17.534967586021782, 8)
loss 891.9248657226562
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 4 20.0 1395.8710659 (17.669285735563751, 10)
loss 438.6496276855469
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 451.3800048828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 1340.3792724609375
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 494.76873779296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 2011.6285400390625
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 946.7479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 1227.6630859375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 1160.4390869140625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 4 19.0 1396.82133527 (19.140765783401285, 11)
loss 1323.1658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 1506.2061767578125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 1162.510986328125
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 0 23.0 1429.39747342 (19.25591252280865, 7)
loss 741.8635864257812
Current State,action,reward,Response time,Next State:  (7, 19.25591252280865) 4 21.0 1575.79467084 (19.08360399753829, 9)
loss 588.3028564453125
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 1 22.0 1422.05003169 (18.668181536495972, 8)
loss 2727.539794921875
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
loss 2163.059814453125
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 2 21.0 1385.00495784 (17.82724819986867, 9)
loss 960.9374389648438
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 2116.144775390625
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 757.0545654296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 562.3843994140625
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 2407.177978515625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 4 19.0 1258.0576862 (15.828704162850809, 11)
loss 798.2615966796875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 1751.3046875
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 1072.9097900390625
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 1 21.0 1197.01631782 (15.750501603468638, 9)
loss 515.155517578125
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 0 23.0 1247.57857022 (15.817158911312735, 7)
loss 1281.4534912109375
Current State,action,reward,Response time,Next State:  (7, 15.817158911312735) 3 22.0 1361.50278706 (15.829956988360925, 8)
loss 479.9794921875
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 2 22.0 1296.22207104 (15.892373986997768, 8)
loss 1093.3436279296875
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 3 21.0 1299.87001973 (15.954793861767499, 9)
loss 1897.7666015625
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 0 23.0 1258.27226176 (16.004586266677634, 7)
loss 1694.361083984375
Current State,action,reward,Response time,Next State:  (7, 16.004586266677634) 4 21.0 1373.18264715 (16.017694914042416, 9)
loss 1849.21142578125
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 1 22.0 1261.56482143 (15.947547279389703, 8)
loss 1904.6573486328125
Current State,action,reward,Response time,Next State:  (8, 15.947547279389703) 3 21.0 1303.0946115 (16.11465619633363, 9)
loss 1056.6484375
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 4 19.0 1266.64026605 (16.147078378791146, 11)
loss 2395.018310546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 661.9581298828125
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 453.42071533203125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 635.55908203125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 1159.9915771484375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 475.04156494140625
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 0 22.0 1271.23153331 (17.052961248403161, 8)
loss 592.7505493164062
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 3 21.0 1367.70030431 (17.215992726625572, 9)
loss 1036.2685546875
############ Running episode number: 20  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 1056.163818359375
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 2 20.0 1012.7846064 (11.786394321941378, 10)
loss 616.9583129882812
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 2 20.0 1002.85899476 (11.61852219546234, 10)
loss 1077.5511474609375
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
loss 1816.160888671875
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 1882.5040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 604.790283203125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 488.7461853027344
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 1063.5531005859375
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 2 20.0 960.915933313 (10.931193889570471, 10)
loss 474.2646484375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 1482.991943359375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 452.9300231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 443.5950622558594
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 1261.6856689453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 1130.073974609375
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 1265.7161865234375
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 4 19.0 977.313511661 (10.552868829802469, 11)
loss 1177.6328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 1587.1763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 2117.085693359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 618.4920654296875
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 476.1595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 2363.428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1117.9864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 2993.796875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 1002.1323852539062
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 1586.58251953125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 1458.9852294921875
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 1 22.0 963.267677113 (10.30224719189987, 8)
loss 568.4680786132812
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 4 20.0 973.15620517 (10.278181486298042, 10)
loss 2236.240234375
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 2 20.0 922.857214352 (10.268274366284802, 10)
loss 622.9203491210938
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 2712.51953125
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 2304.268798828125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 990.5455322265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 1744.91064453125
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 641.2744750976562
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 583.8527221679688
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 1159.347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 1204.404296875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 1119.2279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 511.3785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 509.0332946777344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 1364.5047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 583.2313232421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 1338.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 986.0167236328125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 886.7150268554688
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 0 22.0 957.1563561 (11.039747673816453, 8)
loss 1779.8076171875
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 2 22.0 1016.25926965 (11.271571944085663, 8)
loss 1242.693359375
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 1 23.0 1029.8081916 (11.670334358779868, 7)
loss 1267.45849609375
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 1 24.0 1103.08623692 (11.819721938468785, 6)
loss 974.7079467773438
Current State,action,reward,Response time,Next State:  (6, 11.819721938468785) 0 26.0 1179.14471218 (12.19918626616789, 4)
loss 1168.7550048828125
Current State,action,reward,Response time,Next State:  (4, 12.19918626616789) 2 26.0 1570.95098426 (12.501496275411796, 4)
loss 1848.44970703125
Current State,action,reward,Response time,Next State:  (4, 12.501496275411796) 4 24.0 1605.29472846 (13.168618569876575, 6)
loss 795.8734741210938
Current State,action,reward,Response time,Next State:  (6, 13.168618569876575) 3 23.0 1268.35952947 (13.649658108197247, 7)
loss 1141.32666015625
Current State,action,reward,Response time,Next State:  (7, 13.649658108197247) 1 24.0 1226.43122257 (14.283719188889453, 6)
loss 1768.2958984375
Current State,action,reward,Response time,Next State:  (6, 14.283719188889453) 0 26.0 1342.11128751 (14.677479537099185, 4)
loss 1222.8885498046875
Current State,action,reward,Response time,Next State:  (4, 14.677479537099185) 3 25.0 1852.49597955 (15.353965082180355, 5)
loss 664.306640625
Current State,action,reward,Response time,Next State:  (5, 15.353965082180355) 4 23.0 1668.32121291 (15.836943704090487, 7)
loss 1702.578369140625
Current State,action,reward,Response time,Next State:  (7, 15.836943704090487) 1 24.0 1362.73571067 (16.466876895473597, 6)
loss 554.5890502929688
Current State,action,reward,Response time,Next State:  (6, 16.466876895473597) 3 23.0 1486.50338648 (16.871606159345866, 7)
loss 1793.764404296875
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 1 24.0 1427.21249257 (17.534967586021782, 6)
loss 2238.414794921875
Current State,action,reward,Response time,Next State:  (6, 17.534967586021782) 3 23.0 1557.14594988 (17.669285735563751, 7)
loss 885.138916015625
Current State,action,reward,Response time,Next State:  (7, 17.669285735563751) 3 22.0 1476.92127615 (17.944480812078613, 8)
loss 659.0113525390625
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 3 21.0 1419.80498244 (18.385807405229915, 9)
loss 2022.204345703125
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 1 22.0 1385.5238237 (18.671267839956315, 8)
loss 686.633056640625
Current State,action,reward,Response time,Next State:  (8, 18.671267839956315) 4 20.0 1462.2819013 (19.02839494033929, 10)
loss 2168.677734375
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 0 22.0 1387.00434183 (19.286321916040979, 8)
loss 991.0918579101562
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 1201.90673828125
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 1208.3466796875
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 2 20.0 1396.82133527 (19.140765783401285, 10)
loss 1296.7279052734375
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 1857.029052734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 1017.5435791015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 508.70269775390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 962.9556274414062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1262.5321044921875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 433.73101806640625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 4 19.0 1352.39307459 (17.82724819986867, 11)
loss 467.9590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 1577.3275146484375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 1581.701171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 616.4605102539062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 1149.94091796875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 569.052001953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 1308.70751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 1353.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 638.0604248046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 1479.8004150390625
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 1786.7911376953125
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 0 22.0 1217.34610485 (15.892373986997768, 8)
loss 1771.4517822265625
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 4 20.0 1299.87001973 (15.954793861767499, 10)
loss 596.6646118164062
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 2 20.0 1223.96796344 (16.004586266677634, 10)
loss 1742.54638671875
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 1 21.0 1226.60915635 (16.017694914042416, 9)
loss 1170.5921630859375
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 4 19.0 1261.56482143 (15.947547279389703, 11)
loss 921.9517822265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 1385.64501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 1876.910400390625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 610.0359497070312
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 1948.2435302734375
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 1477.365478515625
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 0 22.0 1261.79596106 (16.836383524612351, 8)
loss 2134.44384765625
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 0 24.0 1355.04246364 (16.845818065953559, 6)
loss 1189.8125
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 0 13.7638869682 1511.56621672 (17.052961248403161, 4)
loss 676.2556762695312
Current State,action,reward,Response time,Next State:  (4, 17.052961248403161) 1 -101.631378022 2122.36113032 (17.215992726625572, 3)
loss 655.6417846679688
############ Running episode number: 21  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 922.0081176757812
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 0 22.0 1012.7846064 (11.786394321941378, 8)
loss 1091.29541015625
Current State,action,reward,Response time,Next State:  (8, 11.786394321941378) 4 20.0 1059.89687994 (11.61852219546234, 10)
loss 670.3671875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 478.3925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 1520.7989501953125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 2455.231689453125
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 0 23.0 1012.3192433 (11.027107764209074, 7)
loss 1395.0074462890625
Current State,action,reward,Response time,Next State:  (7, 11.027107764209074) 1 24.0 1063.00245825 (10.995673623987257, 6)
loss 2539.0078125
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 1 25.0 1124.64289336 (10.931193889570471, 5)
loss 1954.60009765625
Current State,action,reward,Response time,Next State:  (5, 10.931193889570471) 4 23.0 1261.94717276 (10.816918347608043, 7)
loss 1398.09814453125
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 3 22.0 1049.90414092 (10.819208572963639, 8)
loss 491.6440734863281
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 1 23.0 1003.36990711 (10.768325938188134, 7)
loss 1723.62890625
Current State,action,reward,Response time,Next State:  (7, 10.768325938188134) 4 21.0 1046.87602081 (10.772009508959538, 9)
loss 1257.4398193359375
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 2 21.0 986.979077927 (10.644925616761762, 9)
loss 654.0562133789062
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 1191.21630859375
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
loss 664.0090942382812
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 1674.78125
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 1 21.0 937.479622653 (10.489125480251131, 9)
loss 757.3257446289062
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 1 22.0 972.171495057 (10.448897752470936, 8)
loss 760.561767578125
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 2 22.0 981.727167119 (10.433149880183072, 8)
loss 1876.0887451171875
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 3 21.0 980.806785952 (10.44185150623065, 9)
loss 2783.73828125
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 506.56915283203125
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 617.3856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 2173.333740234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 570.8330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1316.972412109375
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 1303.892333984375
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 1054.810791015625
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 988.348876953125
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 0 22.0 922.331700166 (10.335411397720526, 8)
loss 801.2854614257812
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 3 21.0 975.09448038 (10.305649118067803, 9)
loss 1611.913818359375
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 0 23.0 962.567412952 (10.24826025489064, 7)
loss 517.8058471679688
Current State,action,reward,Response time,Next State:  (7, 10.24826025489064) 2 23.0 1014.46722752 (10.276491935146446, 7)
loss 1120.752197265625
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 0 25.0 1016.22653355 (10.236991269871366, 5)
loss 1742.799072265625
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 3 24.0 1198.16229423 (10.236272697871373, 6)
loss 1713.472412109375
Current State,action,reward,Response time,Next State:  (6, 10.236272697871373) 3 23.0 1074.41679501 (10.369891240151098, 7)
loss 472.055419921875
Current State,action,reward,Response time,Next State:  (7, 10.369891240151098) 3 22.0 1022.04687291 (10.316955310454549, 8)
loss 1990.9014892578125
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 3 21.0 974.015818144 (10.333617326102203, 9)
loss 439.3139343261719
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 2 21.0 964.03141062 (10.390165524255663, 9)
loss 1256.2490234375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 2 21.0 966.991429728 (10.425974763084863, 9)
loss 1682.7620849609375
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 2 21.0 968.865866662 (10.546025383098053, 9)
loss 1776.217041015625
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 1 22.0 975.14992417 (10.655373370049301, 8)
loss 1903.9810791015625
Current State,action,reward,Response time,Next State:  (8, 10.655373370049301) 2 22.0 993.794592261 (10.624473674922116, 8)
loss 687.359619140625
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 540.0648803710938
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 566.7642822265625
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 1931.3929443359375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 1050.380126953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 1526.6199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 3132.72021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 724.52197265625
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 2316.14794921875
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 0 22.0 1040.79092857 (13.168618569876575, 8)
loss 650.3793334960938
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 3 21.0 1140.68069275 (13.649658108197247, 9)
loss 974.5744018554688
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 483.13323974609375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 0 22.0 1135.32732476 (14.677479537099185, 8)
loss 462.6947326660156
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 3 21.0 1228.86576266 (15.353965082180355, 9)
loss 2569.01025390625
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 2 21.0 1226.82184023 (15.836943704090487, 9)
loss 1637.1881103515625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 1022.8369140625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 1 21.0 1251.130943 (16.871606159345866, 9)
loss 2165.5830078125
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 4 19.0 1306.26286107 (17.534967586021782, 11)
loss 2431.611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1240.6759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 710.0913696289062
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 1260.8619384765625
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 4 19.0 1385.5238237 (18.671267839956315, 11)
loss 1327.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 440.41778564453125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 1044.7362060546875
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 2 20.0 1400.68584406 (19.340464848017284, 10)
loss 1060.443115234375
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 1 21.0 1403.55780672 (19.213467265587269, 9)
loss 973.6051635742188
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 2 21.0 1428.84773289 (19.140765783401285, 9)
loss 955.4114379882812
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 4 19.0 1425.04216908 (19.385636054792762, 11)
loss 523.8430786132812
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 1578.5184326171875
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 2364.677490234375
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 636.275390625
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 628.363525390625
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 821.6995239257812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 1655.1158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 530.5889892578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 1667.8680419921875
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 1002.131103515625
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 2120.59326171875
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 0 22.0 1223.7505224 (15.828704162850809, 8)
loss 522.2439575195312
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 3 21.0 1296.14884991 (15.550833128512703, 9)
loss 555.3219604492188
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 847.816650390625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 4 19.0 1197.01631782 (15.750501603468638, 11)
loss 441.6055603027344
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 1 20.0 1200.39231205 (15.817158911312735, 10)
loss 459.1363525390625
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 2 20.0 1216.66724247 (15.829956988360925, 10)
loss 579.6586303710938
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 4 19.0 1217.34610485 (15.892373986997768, 11)
loss 446.0367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 484.77008056640625
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 471.1885070800781
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 4 19.0 1260.87864843 (16.017694914042416, 11)
loss 426.7906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 641.2322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 909.1499633789062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 1163.369384765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 461.4647216796875
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 448.17108154296875
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 2 21.0 1276.0866986 (16.667936385136993, 9)
loss 565.561279296875
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 561.0162963867188
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 4 19.0 1270.73108663 (16.845818065953559, 11)
loss 567.6879272460938
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 614.5162963867188
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 2 21.0 1315.75590499 (17.215992726625572, 9)
loss 530.7658081054688
############ Running episode number: 22  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 426.76995849609375
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 622.6673583984375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 4 19.0 1040.0771169 (11.61852219546234, 11)
loss 907.1582641601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 1565.7183837890625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 448.27484130859375
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 435.0997619628906
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 437.1859436035156
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 1 21.0 962.583328739 (10.995673623987257, 9)
loss 491.122802734375
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 468.5154113769531
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 1 21.0 957.495664348 (10.816918347608043, 9)
loss 471.26995849609375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 1394.710205078125
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 717.4978637695312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 1120.0345458984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 481.9472351074219
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 532.1895751953125
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 2 21.0 977.313511661 (10.552868829802469, 9)
loss 436.1952209472656
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 468.6391296386719
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 4 19.0 937.479622653 (10.489125480251131, 11)
loss 1230.65869140625
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 460.94024658203125
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 580.3212890625
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 2 20.0 931.077372094 (10.44185150623065, 10)
loss 2309.4951171875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 4 19.0 931.538941947 (10.370942817486826, 11)
loss 651.3778686523438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 633.943359375
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 903.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 713.4130249023438
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 1294.2794189453125
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 0 23.0 963.267677113 (10.30224719189987, 7)
loss 1130.337646484375
Current State,action,reward,Response time,Next State:  (7, 10.30224719189987) 2 23.0 1017.83151695 (10.278181486298042, 7)
loss 469.9897155761719
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 3 22.0 1016.33182085 (10.268274366284802, 8)
loss 498.2835388183594
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 3 21.0 971.170670341 (10.335411397720526, 9)
loss 463.7079162597656
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 1276.0518798828125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 4 19.0 924.314209939 (10.24826025489064, 11)
loss 467.1291198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1606.5228271484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 1342.128662109375
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 1396.53955078125
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 0 22.0 920.634200723 (10.369891240151098, 8)
loss 1251.1434326171875
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 4 20.0 977.109647703 (10.316955310454549, 10)
loss 1321.946533203125
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 0 22.0 924.913936648 (10.333617326102203, 8)
loss 442.4364318847656
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 3 21.0 974.989626232 (10.390165524255663, 9)
loss 436.7603759765625
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 2 21.0 966.991429728 (10.425974763084863, 9)
loss 432.6976623535156
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 0 23.0 968.865866662 (10.546025383098053, 7)
loss 1187.7027587890625
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 1 24.0 1033.02297692 (10.655373370049301, 6)
loss 1449.4912109375
Current State,action,reward,Response time,Next State:  (6, 10.655373370049301) 0 26.0 1102.13573879 (10.624473674922116, 4)
loss 437.9716491699219
Current State,action,reward,Response time,Next State:  (4, 10.624473674922116) 2 26.0 1392.05672091 (10.771376986314287, 4)
loss 1798.6468505859375
Current State,action,reward,Response time,Next State:  (4, 10.771376986314287) 3 25.0 1408.74558199 (10.924797168745895, 5)
loss 719.6432495117188
Current State,action,reward,Response time,Next State:  (5, 10.924797168745895) 4 23.0 1261.35942784 (11.039747673816453, 7)
loss 430.159912109375
Current State,action,reward,Response time,Next State:  (7, 11.039747673816453) 3 22.0 1063.7901361 (11.271571944085663, 8)
loss 1080.8795166015625
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 2 22.0 1029.8081916 (11.670334358779868, 8)
loss 518.80029296875
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 3 21.0 1053.11377918 (11.819721938468785, 9)
loss 430.1130065917969
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 682.8746948242188
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 430.780029296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 720.2366943359375
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 1171.4306640625
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
loss 502.1785888671875
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 435.88739013671875
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 4 19.0 1191.41116041 (15.353965082180355, 11)
loss 472.3484802246094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 1735.828369140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 1367.0870361328125
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 442.3266296386719
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 2 20.0 1272.5994393 (17.534967586021782, 10)
loss 859.8945922851562
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 462.26177978515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 531.8046264648438
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 433.5455017089844
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 538.6173095703125
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 1036.645751953125
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 0 22.0 1387.00434183 (19.286321916040979, 8)
loss 469.62237548828125
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 1 23.0 1498.22861069 (19.340464848017284, 7)
loss 634.9383544921875
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 2 23.0 1581.06369535 (19.213467265587269, 7)
loss 434.2441101074219
Current State,action,reward,Response time,Next State:  (7, 19.213467265587269) 0 23.373912818 1573.14962117 (19.140765783401285, 5)
loss 431.907470703125
Current State,action,reward,Response time,Next State:  (5, 19.140765783401285) 2 21.1239903788 2016.26087182 (19.385636054792762, 5)
loss 1468.24755859375
Current State,action,reward,Response time,Next State:  (5, 19.385636054792762) 4 23.0 2038.76009621 (19.223969507401588, 7)
loss 1745.4210205078125
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 3 22.0 1573.80408654 (19.25591252280865, 8)
loss 436.7893371582031
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 2 22.0 1496.45133993 (19.08360399753829, 8)
loss 832.4224243164062
Current State,action,reward,Response time,Next State:  (8, 19.08360399753829) 0 24.0 1486.3808035 (18.668181536495972, 6)
loss 429.1568908691406
Current State,action,reward,Response time,Next State:  (6, 18.668181536495972) 2 24.0 1632.09570747 (18.375894992990247, 6)
loss 653.6063232421875
Current State,action,reward,Response time,Next State:  (6, 18.375894992990247) 2 24.0 1612.76413679 (17.82724819986867, 6)
loss 1000.780517578125
Current State,action,reward,Response time,Next State:  (6, 17.82724819986867) 4 22.0 1576.47712838 (17.229782241685768, 8)
loss 429.9291076660156
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 2 22.0 1378.03457101 (16.84211602880065, 8)
loss 1053.5882568359375
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 3 21.0 1355.37749867 (16.237094554670044, 9)
loss 509.12493896484375
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 433.6751403808594
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 4 19.0 1223.7505224 (15.828704162850809, 11)
loss 446.0831298828125
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 680.049072265625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 715.2078857421875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 1169.2183837890625
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 1077.005615234375
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 0 23.0 1251.06775133 (15.829956988360925, 7)
loss 851.685791015625
Current State,action,reward,Response time,Next State:  (7, 15.829956988360925) 1 24.0 1362.30032139 (15.892373986997768, 6)
loss 521.8677368164062
Current State,action,reward,Response time,Next State:  (6, 15.892373986997768) 3 23.0 1448.50627772 (15.954793861767499, 7)
loss 1322.471923828125
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 2 23.0 1370.07974724 (16.004586266677634, 7)
loss 512.0704345703125
Current State,action,reward,Response time,Next State:  (7, 16.004586266677634) 2 23.0 1373.18264715 (16.017694914042416, 7)
loss 436.2444152832031
Current State,action,reward,Response time,Next State:  (7, 16.017694914042416) 1 24.0 1373.9995352 (15.947547279389703, 6)
loss 429.28485107421875
Current State,action,reward,Response time,Next State:  (6, 15.947547279389703) 2 24.0 1452.15538995 (16.11465619633363, 6)
loss 561.671875
Current State,action,reward,Response time,Next State:  (6, 16.11465619633363) 0 24.0551140495 1463.20782432 (16.147078378791146, 4)
loss 432.0207824707031
Current State,action,reward,Response time,Next State:  (4, 16.147078378791146) 4 24.0 2019.4488595 (16.229253414601111, 6)
loss 500.65692138671875
Current State,action,reward,Response time,Next State:  (6, 16.229253414601111) 2 24.0 1470.78718189 (16.295120821876548, 6)
loss 852.8779907226562
Current State,action,reward,Response time,Next State:  (6, 16.295120821876548) 3 23.0 1475.14359332 (16.667936385136993, 7)
loss 423.29644775390625
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 3 22.0 1414.52045804 (16.836383524612351, 8)
loss 551.5258178710938
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 2 22.0 1355.04246364 (16.845818065953559, 8)
loss 427.78082275390625
Current State,action,reward,Response time,Next State:  (8, 16.845818065953559) 3 21.0 1355.59386347 (17.052961248403161, 9)
loss 479.9024353027344
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 1 22.0 1315.75590499 (17.215992726625572, 8)
loss 438.07452392578125
############ Running episode number: 23  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 641.2906494140625
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 0 23.0 1049.87192659 (11.786394321941378, 7)
loss 558.2924194335938
Current State,action,reward,Response time,Next State:  (7, 11.786394321941378) 3 22.0 1110.31871442 (11.61852219546234, 8)
loss 731.0629272460938
Current State,action,reward,Response time,Next State:  (8, 11.61852219546234) 0 24.0 1050.08562792 (11.469111876584304, 6)
loss 437.8621826171875
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 4 22.0 1155.95567613 (11.336751742492702, 8)
loss 428.2364196777344
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 1 23.0 1033.61761156 (11.25610796929319, 7)
loss 455.6675109863281
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 3 22.0 1077.27300243 (11.027107764209074, 8)
loss 435.1181945800781
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 3 21.0 1015.52053272 (10.995673623987257, 9)
loss 453.693359375
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 536.40673828125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 4 19.0 957.495664348 (10.816918347608043, 11)
loss 649.3197631835938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 439.71270751953125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 1370.724853515625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 437.7745056152344
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 587.9555053710938
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 1 22.0 980.32686333 (10.58735855349979, 8)
loss 1261.6099853515625
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 1 23.0 989.819480251 (10.552868829802469, 7)
loss 432.24542236328125
Current State,action,reward,Response time,Next State:  (7, 10.552868829802469) 0 25.0 1033.44943815 (10.553846649940214, 5)
loss 521.686767578125
Current State,action,reward,Response time,Next State:  (5, 10.553846649940214) 0 27.0 1227.27567029 (10.489125480251131, 3)
loss 580.0960083007812
Current State,action,reward,Response time,Next State:  (3, 10.489125480251131) 1 -63.0612324246 1963.33252483 (10.448897752470936, 2)
loss 534.9320678710938
Current State,action,reward,Response time,Next State:  (2, 10.448897752470936) 3 27.0 2910.61232425 (10.433149880183072, 3)
loss 537.4464111328125
Current State,action,reward,Response time,Next State:  (3, 10.433149880183072) 3 26.0 1952.32373471 (10.44185150623065, 4)
loss 499.5538635253906
Current State,action,reward,Response time,Next State:  (4, 10.44185150623065) 3 25.0 1371.31004084 (10.370942817486826, 5)
loss 1229.007568359375
Current State,action,reward,Response time,Next State:  (5, 10.370942817486826) 2 25.0 1210.47005993 (10.42733414151318, 5)
loss 692.3651123046875
Current State,action,reward,Response time,Next State:  (5, 10.42733414151318) 3 24.0 1215.65142003 (10.388469398680568, 6)
loss 429.7737121582031
Current State,action,reward,Response time,Next State:  (6, 10.388469398680568) 2 24.0 1084.48294874 (10.344006106602812, 6)
loss 1173.624267578125
Current State,action,reward,Response time,Next State:  (6, 10.344006106602812) 4 22.0 1081.54218635 (10.319026962956018, 8)
loss 812.3255004882812
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 3 21.0 974.136895449 (10.30224719189987, 9)
loss 413.5795593261719
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 1 22.0 962.389338906 (10.278181486298042, 8)
loss 467.99139404296875
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 2 22.0 971.749689939 (10.268274366284802, 8)
loss 425.41497802734375
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 4 20.0 971.170670341 (10.335411397720526, 10)
loss 608.1991577148438
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 468.86932373046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 561.1035766601562
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 476.6866455078125
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 700.1983032226562
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 1 21.0 920.672316722 (10.236272697871373, 9)
loss 565.7404174804688
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 1 22.0 958.935899728 (10.369891240151098, 8)
loss 1210.344970703125
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 4 20.0 977.109647703 (10.316955310454549, 10)
loss 649.5333251953125
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 464.0401916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 469.32476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 506.52020263671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 527.3907470703125
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 608.2601318359375
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 470.3788757324219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 434.7079772949219
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 581.6503295898438
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 0 22.0 957.1563561 (11.039747673816453, 8)
loss 471.9649353027344
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 0 24.0 1016.25926965 (11.271571944085663, 6)
loss 442.6158752441406
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 0 26.0 1142.8905616 (11.670334358779868, 4)
loss 473.84027099609375
Current State,action,reward,Response time,Next State:  (4, 11.670334358779868) 3 25.0 1510.87108593 (11.819721938468785, 5)
loss 445.1463317871094
Current State,action,reward,Response time,Next State:  (5, 11.819721938468785) 3 24.0 1343.58710331 (12.19918626616789, 6)
loss 1207.1109619140625
Current State,action,reward,Response time,Next State:  (6, 12.19918626616789) 1 25.0 1204.24214357 (12.501496275411796, 5)
loss 428.1454772949219
Current State,action,reward,Response time,Next State:  (5, 12.501496275411796) 0 -22.0311726538 1406.23004361 (13.168618569876575, 3)
loss 461.0372619628906
Current State,action,reward,Response time,Next State:  (3, 13.168618569876575) 1 -162.599441396 2490.31172654 (13.649658108197247, 2)
loss 1177.4478759765625
Current State,action,reward,Response time,Next State:  (2, 13.649658108197247) 2 -182.317663736 3905.99441396 (14.283719188889453, 2)
loss 527.4795532226562
Current State,action,reward,Response time,Next State:  (2, 14.283719188889453) 3 -51.7061268861 4103.17663736 (14.677479537099185, 3)
loss 784.5302734375
Current State,action,reward,Response time,Next State:  (3, 14.677479537099185) 3 26.0 2787.06126886 (15.353965082180355, 4)
loss 470.060546875
Current State,action,reward,Response time,Next State:  (4, 15.353965082180355) 1 -74.5094512724 1929.34770724 (15.836943704090487, 3)
loss 796.6303100585938
Current State,action,reward,Response time,Next State:  (3, 15.836943704090487) 1 -250.210159225 3015.09451272 (16.466876895473597, 2)
loss 460.2798767089844
Current State,action,reward,Response time,Next State:  (2, 16.466876895473597) 4 15.8241603947 4782.10159225 (16.871606159345866, 4)
loss 564.1497192382812
Current State,action,reward,Response time,Next State:  (4, 16.871606159345866) 1 -107.904695815 2101.75839605 (17.534967586021782, 3)
loss 1416.8514404296875
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 17.534967586021782) 0 -287.60303126 3349.04695815 (17.669285735563751, 2)
loss 486.598388671875
Current State,action,reward,Response time,Next State:  (2, 17.669285735563751) 4 3.63583334771 5156.0303126 (17.944480812078613, 4)
loss 1204.2518310546875
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 4 24.0 2223.64166652 (18.385807405229915, 6)
loss 459.4137268066406
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 2 24.0 1613.41973487 (18.671267839956315, 6)
loss 468.9589538574219
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 1 24.4064010816 1632.29983282 (19.02839494033929, 5)
loss 460.3569641113281
Current State,action,reward,Response time,Next State:  (5, 19.02839494033929) 3 24.0 2005.93598918 (19.286321916040979, 6)
loss 507.5359802246094
Current State,action,reward,Response time,Next State:  (6, 19.286321916040979) 3 23.0 1672.97895956 (19.340464848017284, 7)
loss 772.3507080078125
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 1 24.0 1581.06369535 (19.213467265587269, 6)
loss 1195.570068359375
Current State,action,reward,Response time,Next State:  (6, 19.213467265587269) 3 23.0 1668.16041811 (19.140765783401285, 7)
loss 1302.65869140625
Current State,action,reward,Response time,Next State:  (7, 19.140765783401285) 2 23.0 1568.61910246 (19.385636054792762, 7)
loss 476.76934814453125
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 0 22.6094185224 1583.87861729 (19.223969507401588, 5)
loss 1256.7843017578125
Current State,action,reward,Response time,Next State:  (5, 19.223969507401588) 3 24.0 2023.90581478 (19.25591252280865, 6)
loss 798.2461547851562
Current State,action,reward,Response time,Next State:  (6, 19.25591252280865) 1 23.8991280022 1670.96770947 (19.08360399753829, 5)
loss 1166.07373046875
Current State,action,reward,Response time,Next State:  (5, 19.08360399753829) 1 -4.58572458629 2011.00871998 (18.668181536495972, 4)
loss 777.5203247070312
Current State,action,reward,Response time,Next State:  (4, 18.668181536495972) 4 24.0 2305.85724586 (18.375894992990247, 6)
loss 428.5370178222656
Current State,action,reward,Response time,Next State:  (6, 18.375894992990247) 3 23.0 1612.76413679 (17.82724819986867, 7)
loss 490.0897216796875
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 1 24.0 1486.76498054 (17.229782241685768, 6)
loss 429.4350891113281
Current State,action,reward,Response time,Next State:  (6, 17.229782241685768) 2 24.0 1536.96126404 (16.84211602880065, 6)
loss 418.00830078125
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 2 24.0 1511.32136729 (16.237094554670044, 6)
loss 503.7315368652344
Current State,action,reward,Response time,Next State:  (6, 16.237094554670044) 1 25.0 1471.30578788 (15.950694610794756, 5)
loss 730.0409545898438
Current State,action,reward,Response time,Next State:  (5, 15.950694610794756) 3 24.0 1723.15004733 (15.828704162850809, 6)
loss 1884.9251708984375
Current State,action,reward,Response time,Next State:  (6, 15.828704162850809) 3 23.0 1444.29521247 (15.550833128512703, 7)
loss 1402.222900390625
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 4 21.0 1344.9062349 (15.446694946204717, 9)
loss 471.603759765625
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 1 22.0 1231.67579099 (15.750501603468638, 8)
loss 1208.891357421875
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 3 21.0 1291.57831736 (15.817158911312735, 9)
loss 473.3511962890625
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 1100.8223876953125
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 2 20.0 1217.34610485 (15.892373986997768, 10)
loss 445.3879699707031
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 430.3150939941406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 813.8684692382812
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 521.5120239257812
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 434.52679443359375
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 0 22.0 1223.58357506 (16.11465619633363, 8)
loss 792.2783813476562
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 0 24.0 1312.86125789 (16.147078378791146, 6)
loss 1282.451904296875
Current State,action,reward,Response time,Next State:  (6, 16.147078378791146) 3 23.0 1465.35219849 (16.229253414601111, 7)
loss 422.9529724121094
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 0 25.0 1387.18316937 (16.295120821876548, 5)
loss 436.8604431152344
Current State,action,reward,Response time,Next State:  (5, 16.295120821876548) 3 24.0 1754.79669258 (16.667936385136993, 6)
loss 702.2300415039062
Current State,action,reward,Response time,Next State:  (6, 16.667936385136993) 3 23.0 1499.80128138 (16.836383524612351, 7)
loss 1129.775390625
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 4 21.0 1425.01753312 (16.845818065953559, 9)
loss 1257.4434814453125
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 470.5086669921875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 0 22.0 1282.21925533 (17.215992726625572, 8)
loss 1197.0267333984375
############ Running episode number: 24  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 534.8111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 565.9278564453125
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 493.0824890136719
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 467.63055419921875
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 450.41558837890625
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 0 22.0 979.00811241 (11.25610796929319, 8)
loss 1253.068359375
Current State,action,reward,Response time,Next State:  (8, 11.25610796929319) 0 24.0 1028.90440276 (11.027107764209074, 6)
loss 1093.228271484375
Current State,action,reward,Response time,Next State:  (6, 11.027107764209074) 1 25.0 1126.72191929 (10.995673623987257, 5)
loss 497.2840270996094
Current State,action,reward,Response time,Next State:  (5, 10.995673623987257) 3 24.0 1267.87171396 (10.931193889570471, 6)
loss 498.7801818847656
Current State,action,reward,Response time,Next State:  (6, 10.931193889570471) 3 23.0 1120.37826137 (10.816918347608043, 7)
loss 478.318359375
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 1 24.0 1049.90414092 (10.819208572963639, 6)
loss 437.72100830078125
Current State,action,reward,Response time,Next State:  (6, 10.819208572963639) 0 26.0 1112.97165264 (10.768325938188134, 4)
loss 1251.4095458984375
Current State,action,reward,Response time,Next State:  (4, 10.768325938188134) 2 26.0 1408.39896953 (10.772009508959538, 4)
loss 471.89788818359375
Current State,action,reward,Response time,Next State:  (4, 10.772009508959538) 3 25.0 1408.81743934 (10.644925616761762, 5)
loss 450.6581726074219
Current State,action,reward,Response time,Next State:  (5, 10.644925616761762) 0 27.0 1235.64420805 (10.58735855349979, 3)
loss 789.9593505859375
Current State,action,reward,Response time,Next State:  (3, 10.58735855349979) 1 -66.2945562827 1982.65214417 (10.552868829802469, 2)
loss 442.3549499511719
Current State,action,reward,Response time,Next State:  (2, 10.552868829802469) 4 26.0 2942.94556283 (10.553846649940214, 4)
loss 442.4742431640625
Current State,action,reward,Response time,Next State:  (4, 10.553846649940214) 4 24.0 1384.03318082 (10.489125480251131, 6)
loss 559.793212890625
Current State,action,reward,Response time,Next State:  (6, 10.489125480251131) 3 23.0 1091.14025199 (10.448897752470936, 7)
loss 439.2540588378906
Current State,action,reward,Response time,Next State:  (7, 10.448897752470936) 3 22.0 1026.97030049 (10.433149880183072, 8)
loss 1361.239013671875
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 3 21.0 980.806785952 (10.44185150623065, 9)
loss 437.3437805175781
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 436.6195373535156
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 0 22.0 927.777654938 (10.42733414151318, 8)
loss 1662.1087646484375
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 3 21.0 980.466886297 (10.388469398680568, 9)
loss 544.4501342773438
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 0 23.0 966.902645924 (10.344006106602812, 7)
loss 692.2167358398438
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 1 24.0 1020.43379601 (10.319026962956018, 6)
loss 737.3539428710938
Current State,action,reward,Response time,Next State:  (6, 10.319026962956018) 3 23.0 1079.89008812 (10.30224719189987, 7)
loss 609.761962890625
Current State,action,reward,Response time,Next State:  (7, 10.30224719189987) 3 22.0 1017.83151695 (10.278181486298042, 8)
loss 430.91314697265625
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 1476.10009765625
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 591.4453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 4 19.0 925.892923039 (10.305649118067803, 11)
loss 485.17022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 558.7061767578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 1103.68408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 801.94482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 441.73492431640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 1394.02490234375
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 433.67230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 798.3211669921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 4 19.0 914.152581784 (10.390165524255663, 11)
loss 566.9043579101562
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 537.83544921875
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 490.53009033203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 4 19.0 937.064750655 (10.655373370049301, 11)
loss 432.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 548.5253295898438
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 536.9188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 803.153076171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 506.2057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 417.2672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 460.4150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 464.3468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 508.2524108886719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 442.93914794921875
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 775.766845703125
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 0 23.0 1112.429735 (13.649658108197247, 7)
loss 690.9300537109375
Current State,action,reward,Response time,Next State:  (7, 13.649658108197247) 2 23.0 1226.43122257 (14.283719188889453, 7)
loss 484.71600341796875
Current State,action,reward,Response time,Next State:  (7, 14.283719188889453) 2 23.0 1265.94383637 (14.677479537099185, 7)
loss 470.9932861328125
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 0 25.0 1290.48169407 (15.353965082180355, 5)
loss 557.6915893554688
Current State,action,reward,Response time,Next State:  (5, 15.353965082180355) 2 25.0 1668.32121291 (15.836943704090487, 5)
loss 1638.3900146484375
Current State,action,reward,Response time,Next State:  (5, 15.836943704090487) 2 25.0 1712.69836138 (16.466876895473597, 5)
loss 1310.4742431640625
Current State,action,reward,Response time,Next State:  (5, 16.466876895473597) 0 -94.8582850434 1770.57802209 (16.871606159345866, 3)
loss 2009.5802001953125
Current State,action,reward,Response time,Next State:  (3, 16.871606159345866) 3 8.28808337202 3218.58285043 (17.534967586021782, 4)
loss 463.3637390136719
Current State,action,reward,Response time,Next State:  (4, 17.534967586021782) 1 -110.546347383 2177.11916628 (17.669285735563751, 3)
loss 455.5870361328125
Current State,action,reward,Response time,Next State:  (3, 17.669285735563751) 3 3.63583334771 3375.46347383 (17.944480812078613, 4)
loss 446.33740234375
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 1 -124.638267193 2223.64166652 (18.385807405229915, 3)
loss 1083.7650146484375
Current State,action,reward,Response time,Next State:  (3, 18.385807405229915) 3 -4.62078634849 3516.38267193 (18.671267839956315, 4)
loss 904.4805908203125
Current State,action,reward,Response time,Next State:  (4, 18.671267839956315) 3 24.4064010816 2306.20786348 (19.02839494033929, 5)
loss 614.8822631835938
Current State,action,reward,Response time,Next State:  (5, 19.02839494033929) 3 24.0 2005.93598918 (19.286321916040979, 6)
loss 643.0010986328125
Current State,action,reward,Response time,Next State:  (6, 19.286321916040979) 0 -12.2231581375 1672.97895956 (19.340464848017284, 4)
loss 550.7332763671875
Current State,action,reward,Response time,Next State:  (4, 19.340464848017284) 2 -10.7804098492 2382.23158138 (19.213467265587269, 4)
loss 674.9461669921875
Current State,action,reward,Response time,Next State:  (4, 19.213467265587269) 4 24.0 2367.80409849 (19.140765783401285, 6)
loss 1273.6380615234375
Current State,action,reward,Response time,Next State:  (6, 19.140765783401285) 3 23.0 1663.35200707 (19.385636054792762, 7)
loss 608.611328125
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 3 22.0 1583.87861729 (19.223969507401588, 8)
loss 1617.8916015625
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 702.6803588867188
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 1 22.0 1431.06953264 (19.08360399753829, 8)
loss 453.8428649902344
Current State,action,reward,Response time,Next State:  (8, 19.08360399753829) 4 20.0 1486.3808035 (18.668181536495972, 10)
loss 686.8106689453125
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 492.8598327636719
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 678.0607299804688
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 0 22.0 1323.29060362 (17.229782241685768, 8)
loss 429.91961669921875
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 2 22.0 1378.03457101 (16.84211602880065, 8)
loss 599.63232421875
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 4 20.0 1355.37749867 (16.237094554670044, 10)
loss 1266.7235107421875
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 1 21.0 1238.94234737 (15.950694610794756, 9)
loss 481.13555908203125
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 1 22.0 1258.0576862 (15.828704162850809, 8)
loss 551.2997436523438
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 3 21.0 1296.14884991 (15.550833128512703, 9)
loss 1603.83642578125
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 775.2498779296875
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 0 22.0 1197.01631782 (15.750501603468638, 8)
loss 427.2923583984375
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 0 24.0 1291.57831736 (15.817158911312735, 6)
loss 609.7506103515625
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 2 24.0 1443.53161985 (15.829956988360925, 6)
loss 556.8323974609375
Current State,action,reward,Response time,Next State:  (6, 15.829956988360925) 3 23.0 1444.37807323 (15.892373986997768, 7)
loss 481.4499206542969
Current State,action,reward,Response time,Next State:  (7, 15.892373986997768) 3 22.0 1366.1899447 (15.954793861767499, 8)
loss 480.67852783203125
Current State,action,reward,Response time,Next State:  (8, 15.954793861767499) 3 21.0 1303.51813652 (16.004586266677634, 9)
loss 898.0819702148438
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 0 23.0 1260.87864843 (16.017694914042416, 7)
loss 742.7095336914062
Current State,action,reward,Response time,Next State:  (7, 16.017694914042416) 1 24.0 1373.9995352 (15.947547279389703, 6)
loss 443.3528747558594
Current State,action,reward,Response time,Next State:  (6, 15.947547279389703) 0 24.4234442756 1452.15538995 (16.11465619633363, 4)
loss 500.28167724609375
Current State,action,reward,Response time,Next State:  (4, 16.11465619633363) 3 25.0 2015.76555724 (16.147078378791146, 5)
loss 1275.902587890625
Current State,action,reward,Response time,Next State:  (5, 16.147078378791146) 3 24.0 1741.1942241 (16.229253414601111, 6)
loss 528.454345703125
Current State,action,reward,Response time,Next State:  (6, 16.229253414601111) 1 25.0 1470.78718189 (16.295120821876548, 5)
loss 510.0167236328125
Current State,action,reward,Response time,Next State:  (5, 16.295120821876548) 0 -90.8526865427 1754.79669258 (16.667936385136993, 3)
loss 516.1505737304688
Current State,action,reward,Response time,Next State:  (3, 16.667936385136993) 2 -94.1655571537 3178.52686543 (16.836383524612351, 3)
loss 647.4849853515625
Current State,action,reward,Response time,Next State:  (3, 16.836383524612351) 1 -261.994585806 3211.65557154 (16.845818065953559, 2)
loss 585.9619750976562
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.845818065953559) 1 -268.436386844 4899.94585806 (17.052961248403161, 2)
loss 520.4916381835938
Current State,action,reward,Response time,Next State:  (2, 17.052961248403161) 2 -273.506388826 4964.36386844 (17.215992726625572, 2)
loss 1149.1712646484375
############ Running episode number: 25  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 1451.34912109375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 1445.260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 604.5027465820312
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 467.4588928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 855.1282348632812
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 2182.376953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 2097.32177734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 1451.2352294921875
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 1102.4302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 1209.3675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 627.3377685546875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 551.4302368164062
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 2 21.0 986.786261176 (10.772009508959538, 9)
loss 470.1347961425781
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 420.6908264160156
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 2106.908935546875
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 507.977783203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 4 19.0 937.427755072 (10.553846649940214, 11)
loss 451.9885559082031
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 1244.2315673828125
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 1124.025146484375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 4 19.0 931.912703681 (10.433149880183072, 11)
loss 1605.6561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 936.2059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 690.9613647460938
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 624.3155517578125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 603.597900390625
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 781.6988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1283.4764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 1562.8218994140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 1358.4447021484375
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 1319.086181640625
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 751.4568481445312
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 2 20.0 925.892923039 (10.305649118067803, 10)
loss 471.98291015625
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 0 22.0 924.314209939 (10.24826025489064, 8)
loss 654.5802612304688
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 1 23.0 970.000949704 (10.276491935146446, 7)
loss 1581.4755859375
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 0 25.0 1016.22653355 (10.236991269871366, 5)
loss 705.9430541992188
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 2 25.0 1198.16229423 (10.236272697871373, 5)
loss 1280.406982421875
Current State,action,reward,Response time,Next State:  (5, 10.236272697871373) 3 24.0 1198.09627024 (10.369891240151098, 6)
loss 436.58660888671875
Current State,action,reward,Response time,Next State:  (6, 10.369891240151098) 4 22.0 1083.25420594 (10.316955310454549, 8)
loss 1346.9676513671875
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 1 23.0 974.015818144 (10.333617326102203, 7)
loss 455.2110290527344
Current State,action,reward,Response time,Next State:  (7, 10.333617326102203) 3 22.0 1019.78640117 (10.390165524255663, 8)
loss 1310.770751953125
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
loss 556.42041015625
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 2 21.0 968.865866662 (10.546025383098053, 9)
loss 596.6802978515625
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 1226.37841796875
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 627.0599975585938
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 522.602783203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 4 19.0 949.018321829 (10.924797168745895, 11)
loss 1163.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 485.7137145996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 579.7308349609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 911.7500610351562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 2035.9990234375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 2117.627685546875
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 0 23.0 1061.68473805 (12.501496275411796, 7)
loss 520.310791015625
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 4 21.0 1154.88153049 (13.168618569876575, 9)
loss 549.4973754882812
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 1 22.0 1112.429735 (13.649658108197247, 8)
loss 508.8460998535156
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 4 20.0 1168.79494995 (14.283719188889453, 10)
loss 440.5981140136719
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 2 20.0 1135.32732476 (14.677479537099185, 10)
loss 1166.6314697265625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 2 20.0 1156.21398489 (15.353965082180355, 10)
loss 936.9888305664062
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 4 19.0 1192.09754638 (15.836943704090487, 11)
loss 2043.0723876953125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 598.3370361328125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 559.2335205078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 430.63043212890625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 573.2328491210938
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 2 21.0 1348.01745033 (17.944480812078613, 9)
loss 471.844970703125
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 1 22.0 1362.4225545 (18.385807405229915, 8)
loss 513.5892333984375
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 0 24.0 1445.59822471 (18.671267839956315, 6)
loss 586.6505737304688
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 3 23.0 1632.29983282 (19.02839494033929, 7)
loss 605.1947021484375
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 3 22.0 1561.61651886 (19.286321916040979, 8)
loss 520.0296020507812
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 539.1904296875
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 602.6124877929688
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 4 19.0 1396.82133527 (19.140765783401285, 11)
loss 524.2081298828125
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 535.3390502929688
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 4 19.0 1405.95387237 (19.223969507401588, 11)
loss 475.4547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 906.0051879882812
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 1761.0975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 641.571533203125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 798.3030395507812
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 1593.8714599609375
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 1 22.0 1356.28600579 (17.229782241685768, 8)
loss 1980.2496337890625
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 4 20.0 1378.03457101 (16.84211602880065, 10)
loss 517.21826171875
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 482.5411071777344
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 2588.918212890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 667.037841796875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 1227.0972900390625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 556.0432739257812
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 4 19.0 1197.01631782 (15.750501603468638, 11)
loss 457.0620422363281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 1625.5279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 434.1884765625
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 444.792724609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 1310.148681640625
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 1052.5135498046875
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 1691.824951171875
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 624.7555541992188
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 1 21.0 1223.58357506 (16.11465619633363, 9)
loss 1746.4810791015625
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 2 21.0 1266.64026605 (16.147078378791146, 9)
loss 470.98370361328125
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 0 23.0 1268.3374073 (16.229253414601111, 7)
loss 1413.871826171875
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 2 23.0 1387.18316937 (16.295120821876548, 7)
loss 1206.839599609375
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 3 22.0 1391.28781087 (16.667936385136993, 8)
loss 430.9234924316406
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 2 22.0 1345.1976051 (16.836383524612351, 8)
loss 734.158447265625
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 2 22.0 1355.04246364 (16.845818065953559, 8)
loss 471.93701171875
Current State,action,reward,Response time,Next State:  (8, 16.845818065953559) 3 21.0 1355.59386347 (17.052961248403161, 9)
loss 444.5823669433594
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 676.8933715820312
############ Running episode number: 26  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 565.5960693359375
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 2 22.0 1070.83307124 (11.786394321941378, 8)
loss 583.9317016601562
Current State,action,reward,Response time,Next State:  (8, 11.786394321941378) 3 21.0 1059.89687994 (11.61852219546234, 9)
loss 560.7919921875
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 2 21.0 1031.28983953 (11.469111876584304, 9)
loss 1060.4122314453125
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 2 21.0 1023.46894667 (11.336751742492702, 9)
loss 1411.202392578125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 2 21.0 1016.54054685 (11.25610796929319, 9)
loss 1151.1885986328125
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 1 22.0 1012.3192433 (11.027107764209074, 8)
loss 1184.138916015625
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 0 24.0 1015.52053272 (10.995673623987257, 6)
loss 2307.611083984375
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 2 24.0 1124.64289336 (10.931193889570471, 6)
loss 737.9951171875
Current State,action,reward,Response time,Next State:  (6, 10.931193889570471) 4 22.0 1120.37826137 (10.816918347608043, 8)
loss 623.9845581054688
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 1 23.0 1003.23605536 (10.819208572963639, 7)
loss 778.6661376953125
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 4 21.0 1050.04686027 (10.768325938188134, 9)
loss 1331.47265625
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 1 22.0 986.786261176 (10.772009508959538, 8)
loss 1514.076904296875
Current State,action,reward,Response time,Next State:  (8, 10.772009508959538) 3 21.0 1000.61136749 (10.644925616761762, 9)
loss 433.18206787109375
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 1874.49365234375
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 503.27398681640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 492.27825927734375
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 424.7803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 2554.275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1196.3004150390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 426.88946533203125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 422.31329345703125
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 750.5790405273438
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 973.0989990234375
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 608.3248901367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 482.5791931152344
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 1052.950439453125
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 2 20.0 924.133757854 (10.278181486298042, 10)
loss 484.3435974121094
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 1 21.0 922.857214352 (10.268274366284802, 9)
loss 423.6752624511719
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 2 21.0 960.611029141 (10.335411397720526, 9)
loss 764.2396850585938
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 0 23.0 964.125321415 (10.305649118067803, 7)
loss 568.1548461914062
Current State,action,reward,Response time,Next State:  (7, 10.305649118067803) 0 25.0 1018.04351387 (10.24826025489064, 5)
loss 444.4079895019531
Current State,action,reward,Response time,Next State:  (5, 10.24826025489064) 0 27.0 1199.19771361 (10.276491935146446, 3)
loss 1200.755615234375
Current State,action,reward,Response time,Next State:  (3, 10.276491935146446) 3 26.0 1921.51362397 (10.236991269871366, 4)
loss 1875.7764892578125
Current State,action,reward,Response time,Next State:  (4, 10.236991269871366) 3 25.0 1348.03701865 (10.236272697871373, 5)
loss 1564.235107421875
Current State,action,reward,Response time,Next State:  (5, 10.236272697871373) 0 27.0 1198.09627024 (10.369891240151098, 3)
loss 535.0403442382812
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.369891240151098) 0 -58.9580466481 1939.88258017 (10.316955310454549, 2)
loss 503.6399230957031
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.316955310454549) 1 -59.4762070259 2869.58046648 (10.333617326102203, 2)
loss 686.8322143554688
Current State,action,reward,Response time,Next State:  (2, 10.333617326102203) 4 26.0 2874.76207026 (10.390165524255663, 4)
loss 457.744140625
Current State,action,reward,Response time,Next State:  (4, 10.390165524255663) 3 25.0 1365.43828638 (10.425974763084863, 5)
loss 1388.14453125
Current State,action,reward,Response time,Next State:  (5, 10.425974763084863) 3 24.0 1215.52651732 (10.546025383098053, 6)
loss 1865.5771484375
Current State,action,reward,Response time,Next State:  (6, 10.546025383098053) 2 24.0 1094.90356069 (10.655373370049301, 6)
loss 1450.14013671875
Current State,action,reward,Response time,Next State:  (6, 10.655373370049301) 4 22.0 1102.13573879 (10.624473674922116, 8)
loss 427.607177734375
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 587.927490234375
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 1222.3375244140625
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 470.3031921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 1125.1607666015625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 1151.58544921875
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 559.2031860351562
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 0 22.0 1004.62682792 (12.19918626616789, 8)
loss 518.7637329101562
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 4 20.0 1084.02242049 (12.501496275411796, 10)
loss 433.9365234375
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 794.2479858398438
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 415.99755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 436.2585144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1722.0872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 490.8533935546875
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 1475.465087890625
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 1479.6884765625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 438.6598815917969
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 513.5603637695312
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 1 21.0 1307.78684385 (17.669285735563751, 9)
loss 556.9736328125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 0 23.0 1348.01745033 (17.944480812078613, 7)
loss 530.6885375976562
Current State,action,reward,Response time,Next State:  (7, 17.944480812078613) 0 25.0 1494.0705337 (18.385807405229915, 5)
loss 598.9270629882812
Current State,action,reward,Response time,Next State:  (5, 18.385807405229915) 0 -130.252452674 1946.89361927 (18.671267839956315, 3)
loss 1322.731201171875
Current State,action,reward,Response time,Next State:  (3, 18.671267839956315) 3 -8.67790698763 3572.52452674 (19.02839494033929, 4)
loss 561.6475830078125
Current State,action,reward,Response time,Next State:  (4, 19.02839494033929) 2 -11.6080706615 2346.77906988 (19.286321916040979, 4)
loss 630.5513916015625
Current State,action,reward,Response time,Next State:  (4, 19.286321916040979) 4 24.0 2376.08070662 (19.340464848017284, 6)
loss 723.5480346679688
Current State,action,reward,Response time,Next State:  (6, 19.340464848017284) 0 -10.7804098492 1676.55992467 (19.213467265587269, 4)
loss 471.37152099609375
Current State,action,reward,Response time,Next State:  (4, 19.213467265587269) 1 -139.486126476 2367.80409849 (19.140765783401285, 3)
loss 1265.1015625
Current State,action,reward,Response time,Next State:  (3, 19.140765783401285) 4 21.1239903788 3664.86126476 (19.385636054792762, 5)
loss 467.7943115234375
Current State,action,reward,Response time,Next State:  (5, 19.385636054792762) 3 24.0 2038.76009621 (19.223969507401588, 6)
loss 582.5333862304688
Current State,action,reward,Response time,Next State:  (6, 19.223969507401588) 3 23.0 1668.85502699 (19.25591252280865, 7)
loss 553.474853515625
Current State,action,reward,Response time,Next State:  (7, 19.25591252280865) 1 24.0 1575.79467084 (19.08360399753829, 6)
loss 1057.5205078125
Current State,action,reward,Response time,Next State:  (6, 19.08360399753829) 3 23.0 1659.57137766 (18.668181536495972, 7)
loss 1907.4010009765625
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 1 24.0 1539.16919707 (18.375894992990247, 6)
loss 960.6421508789062
Current State,action,reward,Response time,Next State:  (6, 18.375894992990247) 2 24.0 1612.76413679 (17.82724819986867, 6)
loss 2163.25927734375
Current State,action,reward,Response time,Next State:  (6, 17.82724819986867) 1 25.0 1576.47712838 (17.229782241685768, 5)
loss 697.796875
Current State,action,reward,Response time,Next State:  (5, 17.229782241685768) 3 24.0 1840.67545971 (16.84211602880065, 6)
loss 1332.3389892578125
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 3 23.0 1511.32136729 (16.237094554670044, 7)
loss 1201.3359375
Current State,action,reward,Response time,Next State:  (7, 16.237094554670044) 3 22.0 1387.67180358 (15.950694610794756, 8)
loss 714.525390625
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 2 22.0 1303.27855664 (15.828704162850809, 8)
loss 456.89697265625
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 2 22.0 1296.14884991 (15.550833128512703, 8)
loss 647.2979125976562
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 0 24.0 1279.90873428 (15.446694946204717, 6)
loss 1118.3758544921875
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 0 26.0 1419.0294644 (15.750501603468638, 4)
loss 600.7234497070312
Current State,action,reward,Response time,Next State:  (4, 15.750501603468638) 4 24.0 1974.39599686 (15.817158911312735, 6)
loss 1395.7052001953125
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 0 26.0 1443.53161985 (15.829956988360925, 4)
loss 1454.3011474609375
Current State,action,reward,Response time,Next State:  (4, 15.829956988360925) 1 -75.5996054643 1983.42247739 (15.892373986997768, 3)
loss 666.3348388671875
Current State,action,reward,Response time,Next State:  (3, 15.892373986997768) 2 -76.827224819 3025.99605464 (15.954793861767499, 3)
loss 472.4564514160156
Current State,action,reward,Response time,Next State:  (3, 15.954793861767499) 1 -235.833706102 3038.27224819 (16.004586266677634, 2)
loss 556.2681274414062
Current State,action,reward,Response time,Next State:  (2, 16.004586266677634) 4 25.5249670084 4638.33706102 (16.017694914042416, 4)
loss 1441.250244140625
Current State,action,reward,Response time,Next State:  (4, 16.017694914042416) 1 -76.6847053921 2004.75032992 (15.947547279389703, 3)
loss 1073.0430908203125
Current State,action,reward,Response time,Next State:  (3, 15.947547279389703) 4 25.0 3036.84705392 (16.11465619633363, 5)
loss 855.3109741210938
Current State,action,reward,Response time,Next State:  (5, 16.11465619633363) 0 -80.6089080663 1738.21520197 (16.147078378791146, 3)
loss 1326.9454345703125
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 16.147078378791146) 0 -242.820472693 3076.08908066 (16.229253414601111, 2)
loss 1155.4808349609375
Current State,action,reward,Response time,Next State:  (2, 16.229253414601111) 4 22.3732869054 4708.20472693 (16.295120821876548, 4)
loss 1348.1370849609375
Current State,action,reward,Response time,Next State:  (4, 16.295120821876548) 0 -256.462767754 2036.26713095 (16.667936385136993, 2)
loss 1309.313232421875
Current State,action,reward,Response time,Next State:  (2, 16.667936385136993) 3 -94.1655571537 4844.62767754 (16.836383524612351, 3)
loss 1547.435302734375
Current State,action,reward,Response time,Next State:  (3, 16.836383524612351) 3 16.1171244566 3211.65557154 (16.845818065953559, 4)
loss 1167.94580078125
Current State,action,reward,Response time,Next State:  (4, 16.845818065953559) 3 25.0 2098.82875543 (17.052961248403161, 5)
loss 1259.7410888671875
Current State,action,reward,Response time,Next State:  (5, 17.052961248403161) 4 23.0 1824.42875413 (17.215992726625572, 7)
loss 1839.5506591796875
############ Running episode number: 27  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 782.27587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 866.5327758789062
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 1437.8270263671875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 1309.127197265625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 587.3831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 481.0224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 517.4905395507812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 625.3945922851562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 1024.0223388671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 1277.020263671875
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 640.0892333984375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 612.1543579101562
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 1490.1375732421875
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 2 20.0 949.051873418 (10.644925616761762, 10)
loss 434.9742126464844
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 4 19.0 942.310823749 (10.58735855349979, 11)
loss 1173.9781494140625
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 1020.6362915039062
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 2790.385986328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 2268.19775390625
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 524.9985961914062
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 2673.134033203125
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 2 20.0 931.077372094 (10.44185150623065, 10)
loss 1405.3892822265625
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 2 20.0 931.538941947 (10.370942817486826, 10)
loss 614.5833740234375
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 1796.0181884765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 620.5927124023438
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 569.3419799804688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 717.3980102539062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 614.7454223632812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 1380.3404541015625
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 1911.075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 705.6595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1297.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1445.7559814453125
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 2138.046142578125
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 921.347412109375
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 763.23681640625
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 2175.9794921875
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 2 21.0 965.930171009 (10.316955310454549, 9)
loss 1378.258544921875
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 4 19.0 963.159236328 (10.333617326102203, 11)
loss 1000.0269165039062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 4 19.0 914.152581784 (10.390165524255663, 11)
loss 2607.840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1273.66845703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 598.684326171875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 1417.51025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 663.09326171875
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 2365.345947265625
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 2196.53125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 4 19.0 957.1563561 (11.039747673816453, 11)
loss 1586.72216796875
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 1570.9730224609375
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 1 22.0 1013.12870607 (11.670334358779868, 8)
loss 494.1395568847656
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 2 22.0 1053.11377918 (11.819721938468785, 8)
loss 1312.409423828125
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 3 21.0 1061.84470565 (12.19918626616789, 9)
loss 660.0032348632812
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 1 22.0 1061.68473805 (12.501496275411796, 8)
loss 2249.03466796875
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 1 23.0 1101.69086701 (13.168618569876575, 7)
loss 1205.3697509765625
Current State,action,reward,Response time,Next State:  (7, 13.168618569876575) 3 22.0 1196.45441106 (13.649658108197247, 8)
loss 834.8538208007812
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 1 23.0 1168.79494995 (14.283719188889453, 7)
loss 1500.650390625
Current State,action,reward,Response time,Next State:  (7, 14.283719188889453) 3 22.0 1265.94383637 (14.677479537099185, 8)
loss 530.1044311523438
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 4 20.0 1228.86576266 (15.353965082180355, 10)
loss 2530.775390625
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 1231.2325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1359.61083984375
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 2067.5966796875
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 4 19.0 1306.26286107 (17.534967586021782, 11)
loss 462.3232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 629.382080078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 1290.39794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 1512.892333984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 1048.4288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1146.2147216796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 709.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 789.9161376953125
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 1263.4393310546875
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 1 22.0 1428.84773289 (19.140765783401285, 8)
loss 529.6105346679688
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 2218.437744140625
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 817.3258056640625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 2 20.0 1397.37841716 (19.25591252280865, 10)
loss 643.8386840820312
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 2231.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1374.0391845703125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 1431.907470703125
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 2494.81396484375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 4 19.0 1323.29060362 (17.229782241685768, 11)
loss 1362.6708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 666.5379638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 1216.0101318359375
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 1477.50146484375
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 2 20.0 1223.7505224 (15.828704162850809, 10)
loss 967.0441284179688
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 4 19.0 1217.27964986 (15.550833128512703, 11)
loss 608.1089477539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 2129.128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 1205.3653564453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 551.1470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 2903.520751953125
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 604.1280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 843.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1887.42822265625
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 2093.369140625
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 4 19.0 1261.56482143 (15.947547279389703, 11)
loss 740.7869262695312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 668.04638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 2091.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 881.3588256835938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 484.142578125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 1185.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 606.9832153320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 566.7530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1278.06982421875
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 762.49658203125
############ Running episode number: 28  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 650.77392578125
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 456.5381164550781
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 2 21.0 1040.0771169 (11.61852219546234, 9)
loss 2085.672119140625
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 2 21.0 1031.28983953 (11.469111876584304, 9)
loss 633.8802490234375
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 0 23.0 1023.46894667 (11.336751742492702, 7)
loss 450.7337341308594
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 3 22.0 1082.29845875 (11.25610796929319, 8)
loss 1392.2547607421875
Current State,action,reward,Response time,Next State:  (8, 11.25610796929319) 0 24.0 1028.90440276 (11.027107764209074, 6)
loss 1450.1396484375
Current State,action,reward,Response time,Next State:  (6, 11.027107764209074) 0 26.0 1126.72191929 (10.995673623987257, 4)
loss 616.619140625
Current State,action,reward,Response time,Next State:  (4, 10.995673623987257) 0 -78.0598228672 1434.22666469 (10.931193889570471, 2)
loss 1412.9403076171875
Current State,action,reward,Response time,Next State:  (2, 10.931193889570471) 3 24.2200048766 3060.59822867 (10.816918347608043, 3)
loss 1430.40673828125
Current State,action,reward,Response time,Next State:  (3, 10.816918347608043) 3 26.0 2027.79995123 (10.819208572963639, 4)
loss 428.15582275390625
Current State,action,reward,Response time,Next State:  (4, 10.819208572963639) 3 25.0 1414.17946021 (10.768325938188134, 5)
loss 1851.190673828125
Current State,action,reward,Response time,Next State:  (5, 10.768325938188134) 0 25.1032325301 1246.98250365 (10.772009508959538, 3)
loss 929.4411010742188
Current State,action,reward,Response time,Next State:  (3, 10.772009508959538) 3 26.0 2018.9676747 (10.644925616761762, 4)
loss 470.4277038574219
Current State,action,reward,Response time,Next State:  (4, 10.644925616761762) 2 26.0 1394.38015129 (10.58735855349979, 4)
loss 491.2958068847656
Current State,action,reward,Response time,Next State:  (4, 10.58735855349979) 0 -66.2945562827 1387.84028018 (10.552868829802469, 2)
loss 2981.2392578125
Current State,action,reward,Response time,Next State:  (2, 10.552868829802469) 2 -66.3249648277 2942.94556283 (10.553846649940214, 2)
loss 1097.035888671875
Current State,action,reward,Response time,Next State:  (2, 10.553846649940214) 3 27.0 2943.24964828 (10.489125480251131, 3)
loss 1587.3035888671875
Current State,action,reward,Response time,Next State:  (3, 10.489125480251131) 3 26.0 1963.33252483 (10.448897752470936, 4)
loss 1210.0042724609375
Current State,action,reward,Response time,Next State:  (4, 10.448897752470936) 3 25.0 1372.11052534 (10.433149880183072, 5)
loss 554.9321899414062
Current State,action,reward,Response time,Next State:  (5, 10.433149880183072) 2 25.0 1216.18578302 (10.44185150623065, 5)
loss 2249.5078125
Current State,action,reward,Response time,Next State:  (5, 10.44185150623065) 4 23.0 1216.98530774 (10.370942817486826, 7)
loss 1753.85498046875
Current State,action,reward,Response time,Next State:  (7, 10.370942817486826) 1 24.0 1022.11240377 (10.42733414151318, 6)
loss 1199.25830078125
Current State,action,reward,Response time,Next State:  (6, 10.42733414151318) 3 23.0 1087.05342808 (10.388469398680568, 7)
loss 1607.9102783203125
Current State,action,reward,Response time,Next State:  (7, 10.388469398680568) 3 22.0 1023.20460302 (10.344006106602812, 8)
loss 454.82794189453125
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 0 24.0 975.596796379 (10.319026962956018, 6)
loss 2061.381591796875
Current State,action,reward,Response time,Next State:  (6, 10.319026962956018) 3 23.0 1079.89008812 (10.30224719189987, 7)
loss 1090.281494140625
Current State,action,reward,Response time,Next State:  (7, 10.30224719189987) 1 24.0 1017.83151695 (10.278181486298042, 6)
loss 557.4513549804688
Current State,action,reward,Response time,Next State:  (6, 10.278181486298042) 2 24.0 1077.18860481 (10.268274366284802, 6)
loss 1303.677490234375
Current State,action,reward,Response time,Next State:  (6, 10.268274366284802) 1 25.0 1076.53335675 (10.335411397720526, 5)
loss 1941.9366455078125
Current State,action,reward,Response time,Next State:  (5, 10.335411397720526) 2 25.0 1207.20535417 (10.305649118067803, 5)
loss 747.4907836914062
Current State,action,reward,Response time,Next State:  (5, 10.305649118067803) 1 26.0 1204.47072981 (10.24826025489064, 4)
loss 701.7479248046875
Current State,action,reward,Response time,Next State:  (4, 10.24826025489064) 3 25.0 1349.31722482 (10.276491935146446, 5)
loss 709.52880859375
Current State,action,reward,Response time,Next State:  (5, 10.276491935146446) 4 23.0 1201.79170309 (10.236991269871366, 7)
loss 780.7794189453125
Current State,action,reward,Response time,Next State:  (7, 10.236991269871366) 4 21.0 1013.76498121 (10.236272697871373, 9)
loss 1356.11474609375
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 4 19.0 958.935899728 (10.369891240151098, 11)
loss 1173.0224609375
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 882.2345581054688
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 0 22.0 924.913936648 (10.333617326102203, 8)
loss 2232.503173828125
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 0 24.0 974.989626232 (10.390165524255663, 6)
loss 531.3179321289062
Current State,action,reward,Response time,Next State:  (6, 10.390165524255663) 1 25.0 1084.59512897 (10.425974763084863, 5)
loss 670.5115356445312
Current State,action,reward,Response time,Next State:  (5, 10.425974763084863) 3 24.0 1215.52651732 (10.546025383098053, 6)
loss 876.70068359375
Current State,action,reward,Response time,Next State:  (6, 10.546025383098053) 4 22.0 1094.90356069 (10.655373370049301, 8)
loss 644.7507934570312
Current State,action,reward,Response time,Next State:  (8, 10.655373370049301) 3 21.0 993.794592261 (10.624473674922116, 9)
loss 558.1175537109375
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 662.01611328125
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 0 22.0 949.018321829 (10.924797168745895, 8)
loss 1480.3194580078125
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 1 23.0 1009.54101094 (11.039747673816453, 7)
loss 493.4076232910156
Current State,action,reward,Response time,Next State:  (7, 11.039747673816453) 1 24.0 1063.7901361 (11.271571944085663, 6)
loss 1066.19384765625
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 3 23.0 1142.8905616 (11.670334358779868, 7)
loss 1704.52783203125
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 0 25.0 1103.08623692 (11.819721938468785, 5)
loss 1208.6986083984375
Current State,action,reward,Response time,Next State:  (5, 11.819721938468785) 2 25.0 1343.58710331 (12.19918626616789, 5)
loss 1038.737060546875
Current State,action,reward,Response time,Next State:  (5, 12.19918626616789) 0 -8.91079643257 1378.45312853 (12.501496275411796, 3)
loss 500.1732482910156
Current State,action,reward,Response time,Next State:  (3, 12.501496275411796) 1 -147.63992904 2359.10796433 (13.168618569876575, 2)
loss 490.8937072753906
Current State,action,reward,Response time,Next State:  (2, 13.168618569876575) 4 26.0 3756.3992904 (13.649658108197247, 4)
loss 441.1231689453125
Current State,action,reward,Response time,Next State:  (4, 13.649658108197247) 2 26.0 1735.73095309 (14.283719188889453, 4)
loss 1434.2705078125
Current State,action,reward,Response time,Next State:  (4, 14.283719188889453) 3 25.0 1807.76307539 (14.677479537099185, 5)
loss 1854.96923828125
Current State,action,reward,Response time,Next State:  (5, 14.677479537099185) 3 24.0 1606.16421855 (15.353965082180355, 6)
loss 1071.530517578125
Current State,action,reward,Response time,Next State:  (6, 15.353965082180355) 2 24.0 1412.89639409 (15.836943704090487, 6)
loss 1830.630126953125
Current State,action,reward,Response time,Next State:  (6, 15.836943704090487) 1 25.0 1444.84016836 (16.466876895473597, 5)
loss 1167.4603271484375
Current State,action,reward,Response time,Next State:  (5, 16.466876895473597) 0 -94.8582850434 1770.57802209 (16.871606159345866, 3)
loss 1268.60595703125
Current State,action,reward,Response time,Next State:  (3, 16.871606159345866) 3 8.28808337202 3218.58285043 (17.534967586021782, 4)
loss 1120.47314453125
Current State,action,reward,Response time,Next State:  (4, 17.534967586021782) 3 25.0 2177.11916628 (17.669285735563751, 5)
loss 1339.9261474609375
Current State,action,reward,Response time,Next State:  (5, 17.669285735563751) 1 3.63583334771 1881.05801687 (17.944480812078613, 4)
loss 1068.3443603515625
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 2 -1.37783049286 2223.64166652 (18.385807405229915, 4)
loss 2072.334716796875
Current State,action,reward,Response time,Next State:  (4, 18.385807405229915) 0 -318.762972339 2273.77830493 (18.671267839956315, 2)
loss 1828.52734375
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 18.671267839956315) 0 -329.869018402 5467.62972339 (19.02839494033929, 2)
loss 596.2457885742188
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 19.02839494033929) 1 -337.890109128 5578.69018402 (19.286321916040979, 2)
loss 511.02752685546875
Current State,action,reward,Response time,Next State:  (2, 19.286321916040979) 3 -143.413632543 5658.90109128 (19.340464848017284, 3)
loss 1620.5760498046875
Current State,action,reward,Response time,Next State:  (3, 19.340464848017284) 4 22.7059154527 3704.13632543 (19.213467265587269, 5)
loss 1003.618408203125
Current State,action,reward,Response time,Next State:  (5, 19.213467265587269) 1 -9.95448909562 2022.94084547 (19.140765783401285, 4)
loss 575.6339721679688
Current State,action,reward,Response time,Next State:  (4, 19.140765783401285) 0 -340.978610108 2359.54489096 (19.385636054792762, 2)
loss 1514.5145263671875
Current State,action,reward,Response time,Next State:  (2, 19.385636054792762) 4 -10.8997199267 5689.78610108 (19.223969507401588, 4)
loss 626.0947875976562
Current State,action,reward,Response time,Next State:  (4, 19.223969507401588) 4 24.0 2368.99719927 (19.25591252280865, 6)
loss 2360.107177734375
Current State,action,reward,Response time,Next State:  (6, 19.25591252280865) 4 22.0 1670.96770947 (19.08360399753829, 8)
loss 1407.15771484375
Current State,action,reward,Response time,Next State:  (8, 19.08360399753829) 4 20.0 1486.3808035 (18.668181536495972, 10)
loss 1225.3275146484375
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 2 20.0 1367.89714889 (18.375894992990247, 10)
loss 603.268798828125
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 1 21.0 1352.39307459 (17.82724819986867, 9)
loss 705.6287231445312
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 0 23.0 1356.28600579 (17.229782241685768, 7)
loss 1368.3538818359375
Current State,action,reward,Response time,Next State:  (7, 17.229782241685768) 3 22.0 1449.53285514 (16.84211602880065, 8)
loss 2323.60400390625
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 2 22.0 1355.37749867 (16.237094554670044, 8)
loss 1796.42138671875
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 2 22.0 1320.01714264 (15.950694610794756, 8)
loss 2628.444580078125
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 3 21.0 1303.27855664 (15.828704162850809, 9)
loss 1528.570068359375
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 2234.01708984375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 4 19.0 1202.54023315 (15.446694946204717, 11)
loss 1368.7423095703125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 1300.631103515625
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 4 19.0 1247.57857022 (15.817158911312735, 11)
loss 1676.537109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 1457.73388671875
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 2187.457763671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 1010.89306640625
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 969.1439819335938
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 2 20.0 1226.60915635 (16.017694914042416, 10)
loss 1075.2210693359375
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 797.3086547851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1260.1043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 989.9480590820312
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 2670.071533203125
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 653.885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 828.46875
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 1960.505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 2565.684814453125
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 1286.14013671875
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 3346.893310546875
############ Running episode number: 29  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 2911.1259765625
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 0 23.0 1049.87192659 (11.786394321941378, 7)
loss 1244.2408447265625
Current State,action,reward,Response time,Next State:  (7, 11.786394321941378) 1 24.0 1110.31871442 (11.61852219546234, 6)
loss 2110.828125
Current State,action,reward,Response time,Next State:  (6, 11.61852219546234) 2 24.0 1165.83754105 (11.469111876584304, 6)
loss 2763.913330078125
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 2 24.0 1155.95567613 (11.336751742492702, 6)
loss 645.906494140625
Current State,action,reward,Response time,Next State:  (6, 11.336751742492702) 3 23.0 1147.20149519 (11.25610796929319, 7)
loss 601.3908081054688
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 0 25.0 1077.27300243 (11.027107764209074, 5)
loss 2526.052734375
Current State,action,reward,Response time,Next State:  (5, 11.027107764209074) 3 24.0 1270.75995258 (10.995673623987257, 6)
loss 1247.78271484375
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 3 23.0 1124.64289336 (10.931193889570471, 7)
loss 1310.3985595703125
Current State,action,reward,Response time,Next State:  (7, 10.931193889570471) 0 25.0 1057.02541913 (10.816918347608043, 5)
loss 1555.732666015625
Current State,action,reward,Response time,Next State:  (5, 10.816918347608043) 1 26.0 1251.44728215 (10.819208572963639, 4)
loss 2092.220947265625
Current State,action,reward,Response time,Next State:  (4, 10.819208572963639) 0 -72.9949062919 1414.17946021 (10.768325938188134, 2)
loss 1736.2791748046875
Current State,action,reward,Response time,Next State:  (2, 10.768325938188134) 3 25.1032325301 3009.94906292 (10.772009508959538, 3)
loss 3414.26611328125
Current State,action,reward,Response time,Next State:  (3, 10.772009508959538) 4 25.0 2018.9676747 (10.644925616761762, 5)
loss 578.015380859375
Current State,action,reward,Response time,Next State:  (5, 10.644925616761762) 3 24.0 1235.64420805 (10.58735855349979, 6)
loss 2800.06787109375
Current State,action,reward,Response time,Next State:  (6, 10.58735855349979) 4 22.0 1097.63729963 (10.552868829802469, 8)
loss 1296.0478515625
Current State,action,reward,Response time,Next State:  (8, 10.552868829802469) 3 21.0 987.80373542 (10.553846649940214, 9)
loss 813.3181762695312
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 0 23.0 975.559328891 (10.489125480251131, 7)
loss 1655.1263427734375
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 0 25.0 1029.47716098 (10.448897752470936, 5)
loss 1384.84521484375
Current State,action,reward,Response time,Next State:  (5, 10.448897752470936) 0 27.0 1217.6327325 (10.433149880183072, 3)
loss 1585.166015625
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.433149880183072) 0 -62.8421061381 1952.32373471 (10.44185150623065, 2)
loss 567.91259765625
Current State,action,reward,Response time,Next State:  (2, 10.44185150623065) 3 27.0 2908.42106138 (10.370942817486826, 3)
loss 1379.1702880859375
Current State,action,reward,Response time,Next State:  (3, 10.370942817486826) 2 27.0 1940.08939517 (10.42733414151318, 3)
loss 1082.669677734375
Current State,action,reward,Response time,Next State:  (3, 10.42733414151318) 3 26.0 1951.17994623 (10.388469398680568, 4)
loss 2445.8017578125
Current State,action,reward,Response time,Next State:  (4, 10.388469398680568) 4 24.0 1365.24559907 (10.344006106602812, 6)
loss 1426.9658203125
Current State,action,reward,Response time,Next State:  (6, 10.344006106602812) 1 25.0 1081.54218635 (10.319026962956018, 5)
loss 1883.8062744140625
Current State,action,reward,Response time,Next State:  (5, 10.319026962956018) 3 24.0 1205.69991592 (10.30224719189987, 6)
loss 1483.682861328125
Current State,action,reward,Response time,Next State:  (6, 10.30224719189987) 1 25.0 1078.78028906 (10.278181486298042, 5)
loss 2522.702880859375
Current State,action,reward,Response time,Next State:  (5, 10.278181486298042) 2 25.0 1201.9469428 (10.268274366284802, 5)
loss 1024.952880859375
Current State,action,reward,Response time,Next State:  (5, 10.268274366284802) 4 23.0 1201.03665461 (10.335411397720526, 7)
loss 2201.115478515625
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 4 21.0 1019.89820185 (10.305649118067803, 9)
loss 606.9725341796875
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 0 23.0 962.567412952 (10.24826025489064, 7)
loss 2001.6251220703125
Current State,action,reward,Response time,Next State:  (7, 10.24826025489064) 3 22.0 1014.46722752 (10.276491935146446, 8)
loss 495.3424377441406
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 1 23.0 971.650944469 (10.236991269871366, 7)
loss 1299.24853515625
Current State,action,reward,Response time,Next State:  (7, 10.236991269871366) 4 21.0 1013.76498121 (10.236272697871373, 9)
loss 1348.60400390625
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 1515.583251953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 4 19.0 927.721874973 (10.316955310454549, 11)
loss 1189.441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 2056.9423828125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 1511.281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 820.9090576171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 2764.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1530.1656494140625
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 3596.387451171875
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 2074.60791015625
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 1 21.0 949.018321829 (10.924797168745895, 9)
loss 3575.37890625
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 683.23583984375
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 1 21.0 963.253801267 (11.271571944085663, 9)
loss 1085.3067626953125
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 4 19.0 1013.12870607 (11.670334358779868, 11)
loss 1179.90087890625
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 1308.4246826171875
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 4 19.0 1041.82165315 (12.19918626616789, 11)
loss 2490.53515625
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 1984.2054443359375
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 1 21.0 1040.79092857 (13.168618569876575, 9)
loss 1341.501708984375
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 1 22.0 1112.429735 (13.649658108197247, 8)
loss 2699.985595703125
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 4 20.0 1168.79494995 (14.283719188889453, 10)
loss 2050.601806640625
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 1 21.0 1135.32732476 (14.677479537099185, 9)
loss 462.8455810546875
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 2 21.0 1191.41116041 (15.353965082180355, 9)
loss 1607.0765380859375
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 4 19.0 1226.82184023 (15.836943704090487, 11)
loss 2828.841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 902.2546997070312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 2435.48486328125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 1787.74365234375
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 4 19.0 1340.98655806 (17.669285735563751, 11)
loss 1679.574462890625
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 2982.122314453125
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 2395.51611328125
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 0 22.0 1352.9188695 (18.671267839956315, 8)
loss 1167.4638671875
Current State,action,reward,Response time,Next State:  (8, 18.671267839956315) 3 21.0 1462.2819013 (19.02839494033929, 9)
loss 3217.479736328125
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 4 19.0 1419.16011 (19.286321916040979, 11)
loss 2851.774169921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 1027.0203857421875
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 1453.8828125
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 2 21.0 1428.84773289 (19.140765783401285, 9)
loss 3075.759033203125
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 2077.625
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 649.1133422851562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 698.9403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 2697.058349609375
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 1746.466064453125
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 827.5234985351562
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 0 22.0 1352.39307459 (17.82724819986867, 8)
loss 3992.01806640625
Current State,action,reward,Response time,Next State:  (8, 17.82724819986867) 2 22.0 1412.95334646 (17.229782241685768, 8)
loss 4134.13330078125
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 3 21.0 1378.03457101 (16.84211602880065, 9)
loss 2602.95068359375
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 4 19.0 1304.71919827 (16.237094554670044, 11)
loss 1912.2806396484375
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 955.1888427734375
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 2 20.0 1223.7505224 (15.828704162850809, 10)
loss 642.657470703125
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 1 21.0 1217.27964986 (15.550833128512703, 9)
loss 1418.2613525390625
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 4 19.0 1237.12691092 (15.446694946204717, 11)
loss 688.919677734375
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 2519.32763671875
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 2 20.0 1213.1314661 (15.817158911312735, 10)
loss 1044.8529052734375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 2 20.0 1216.66724247 (15.829956988360925, 10)
loss 1691.63916015625
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 1987.2083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 1498.406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1094.607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1153.2939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 759.754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1351.761962890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 836.9828491210938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 3501.5
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1965.734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 2054.44921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 623.29296875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 699.0853271484375
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 623.1435546875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 2 20.0 1282.21925533 (17.215992726625572, 10)
loss 902.0791625976562
############ Running episode number: 30  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1944.5159912109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 4470.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 883.712158203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 2820.3759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 2468.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 539.3209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 2236.3369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 3160.384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 3748.125732421875
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 1450.416015625
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 2 21.0 989.329834005 (10.819208572963639, 9)
loss 1400.290771484375
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 599.9810180664062
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 1 21.0 948.856481751 (10.772009508959538, 9)
loss 3548.23974609375
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 0 23.0 986.979077927 (10.644925616761762, 7)
loss 1805.80615234375
Current State,action,reward,Response time,Next State:  (7, 10.644925616761762) 1 24.0 1039.18611617 (10.58735855349979, 6)
loss 1357.502197265625
Current State,action,reward,Response time,Next State:  (6, 10.58735855349979) 4 22.0 1097.63729963 (10.552868829802469, 8)
loss 1013.1004028320312
Current State,action,reward,Response time,Next State:  (8, 10.552868829802469) 4 20.0 987.80373542 (10.553846649940214, 10)
loss 2239.720458984375
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 2 20.0 937.479622653 (10.489125480251131, 10)
loss 510.5622253417969
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 2 20.0 934.046546974 (10.448897752470936, 10)
loss 521.0337524414062
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 931.2048950195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 1411.2335205078125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 4357.13330078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 1554.2349853515625
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 1688.3046875
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 1846.101318359375
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 0 22.0 926.348821567 (10.319026962956018, 8)
loss 1505.2921142578125
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 2 22.0 974.136895449 (10.30224719189987, 8)
loss 2829.19189453125
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 3 21.0 973.15620517 (10.278181486298042, 9)
loss 1086.158203125
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 1 22.0 961.129617982 (10.268274366284802, 8)
loss 2305.330810546875
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 0 24.0 971.170670341 (10.335411397720526, 6)
loss 2012.2364501953125
Current State,action,reward,Response time,Next State:  (6, 10.335411397720526) 0 26.0 1080.97373999 (10.305649118067803, 4)
loss 1438.789306640625
Current State,action,reward,Response time,Next State:  (4, 10.305649118067803) 3 25.0 1355.83685161 (10.24826025489064, 5)
loss 570.508056640625
Current State,action,reward,Response time,Next State:  (5, 10.24826025489064) 1 26.0 1199.19771361 (10.276491935146446, 4)
loss 1688.6502685546875
Current State,action,reward,Response time,Next State:  (4, 10.276491935146446) 3 25.0 1352.52446763 (10.236991269871366, 5)
loss 654.4539794921875
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 3 24.0 1198.16229423 (10.236272697871373, 6)
loss 2365.7333984375
Current State,action,reward,Response time,Next State:  (6, 10.236272697871373) 4 22.0 1074.41679501 (10.369891240151098, 8)
loss 910.3993530273438
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 3 21.0 977.109647703 (10.316955310454549, 9)
loss 593.423095703125
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 4 19.0 963.159236328 (10.333617326102203, 11)
loss 2093.12646484375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 772.4270629882812
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 1514.841552734375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 2060.428955078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 4 19.0 937.064750655 (10.655373370049301, 11)
loss 2821.92333984375
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 2399.855224609375
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 1 22.0 979.256305105 (10.771376986314287, 8)
loss 435.9341735839844
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 3 21.0 1000.57439983 (10.924797168745895, 9)
loss 2609.857666015625
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 2 21.0 994.97675791 (11.039747673816453, 9)
loss 1027.0516357421875
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 1108.0782470703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 4 19.0 975.550709187 (11.670334358779868, 11)
loss 2519.27978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 1910.771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 3376.409423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 1074.46630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 2513.729248046875
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 1177.5526123046875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 2013.31396484375
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 1152.5509033203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 4 19.0 1156.21398489 (15.353965082180355, 11)
loss 3446.2236328125
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 1923.2376708984375
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 2 20.0 1217.71670884 (16.466876895473597, 10)
loss 1008.658935546875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 1882.1181640625
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 2319.64794921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 1296.4351806640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 3464.9560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 743.5238647460938
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 598.1026000976562
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 1585.5865478515625
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 5133.8017578125
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 4 19.0 1432.66131431 (19.340464848017284, 11)
loss 1212.529541015625
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 2632.72509765625
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 1709.0311279296875
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 2047.0081787109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 1219.22998046875
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 610.7195434570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 1969.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 2185.177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 485.2964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 3232.55517578125
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 2311.7294921875
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 603.7877197265625
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 2286.7490234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 1873.71337890625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 2132.3408203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 1675.0650634765625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 2765.297607421875
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 0 23.0 1231.67579099 (15.750501603468638, 7)
loss 668.8662719726562
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 4 21.0 1357.34892154 (15.817158911312735, 9)
loss 636.5196533203125
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 1 22.0 1251.06775133 (15.829956988360925, 8)
loss 2411.041259765625
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
loss 1272.1273193359375
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 771.1026000976562
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 2 20.0 1223.96796344 (16.004586266677634, 10)
loss 694.3603515625
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 2 20.0 1226.60915635 (16.017694914042416, 10)
loss 530.745361328125
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 1769.9886474609375
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 1676.6103515625
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 1 22.0 1266.64026605 (16.147078378791146, 8)
loss 1133.5433349609375
Current State,action,reward,Response time,Next State:  (8, 16.147078378791146) 4 20.0 1314.7561657 (16.229253414601111, 10)
loss 1891.611572265625
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 1526.2208251953125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 1441.1610107421875
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 0 22.0 1261.79596106 (16.836383524612351, 8)
loss 1292.8331298828125
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 3 21.0 1355.04246364 (16.845818065953559, 9)
loss 1317.7730712890625
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 4335.65625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 4 19.0 1282.21925533 (17.215992726625572, 11)
loss 1207.097900390625
############ Running episode number: 31  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 1816.43359375
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 0 24.0 1070.83307124 (11.786394321941378, 6)
loss 1128.5601806640625
Current State,action,reward,Response time,Next State:  (6, 11.786394321941378) 2 24.0 1176.94045342 (11.61852219546234, 6)
loss 2260.614013671875
Current State,action,reward,Response time,Next State:  (6, 11.61852219546234) 4 22.0 1165.83754105 (11.469111876584304, 8)
loss 1704.6099853515625
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 560.6580810546875
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 4 19.0 1016.54054685 (11.25610796929319, 11)
loss 3976.14501953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 835.4951782226562
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 857.9944458007812
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 2 20.0 960.915933313 (10.931193889570471, 10)
loss 1829.2294921875
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 505.3678283691406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 1774.5831298828125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 495.9000244140625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 913.3591918945312
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 1194.1417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 1123.8192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 829.773681640625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 1309.964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1541.6571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 1411.8355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 4138.953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 1165.6016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 4428.6015625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 1505.0880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 5410.2822265625
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 2266.938720703125
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 2 21.0 964.575212011 (10.319026962956018, 9)
loss 2357.424560546875
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 1 22.0 963.267677113 (10.30224719189987, 8)
loss 3345.743408203125
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 1 23.0 973.15620517 (10.278181486298042, 7)
loss 819.3046875
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 2 23.0 1016.33182085 (10.268274366284802, 7)
loss 1997.804931640625
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 0 25.0 1015.71444152 (10.335411397720526, 5)
loss 1057.47119140625
Current State,action,reward,Response time,Next State:  (5, 10.335411397720526) 0 27.0 1207.20535417 (10.305649118067803, 3)
loss 2620.7216796875
Current State,action,reward,Response time,Next State:  (3, 10.305649118067803) 2 27.0 1927.248003 (10.24826025489064, 3)
loss 1353.0609130859375
Current State,action,reward,Response time,Next State:  (3, 10.24826025489064) 3 26.0 1915.96126469 (10.276491935146446, 4)
loss 685.6845092773438
Current State,action,reward,Response time,Next State:  (4, 10.276491935146446) 3 25.0 1352.52446763 (10.236991269871366, 5)
loss 1202.4031982421875
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 3 24.0 1198.16229423 (10.236272697871373, 6)
loss 2439.371826171875
Current State,action,reward,Response time,Next State:  (6, 10.236272697871373) 2 24.0 1074.41679501 (10.369891240151098, 6)
loss 1260.43603515625
Current State,action,reward,Response time,Next State:  (6, 10.369891240151098) 4 22.0 1083.25420594 (10.316955310454549, 8)
loss 779.898681640625
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 4 20.0 974.015818144 (10.333617326102203, 10)
loss 1652.4149169921875
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 0 22.0 925.797758139 (10.390165524255663, 8)
loss 2311.8046875
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
loss 2007.9951171875
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 4 19.0 968.865866662 (10.546025383098053, 11)
loss 448.6386413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1505.1627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 1011.7046508789062
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 1398.5369873046875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 1 22.0 986.945968488 (10.924797168745895, 8)
loss 588.4241943359375
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 3 21.0 1009.54101094 (11.039747673816453, 9)
loss 721.1727905273438
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 1137.061279296875
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 809.8558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 2344.821533203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 2435.57177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1586.2215576171875
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 4034.65869140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 1829.694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 490.2834777832031
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 2050.972900390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 5007.197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1857.7620849609375
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 937.9231567382812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 2660.19775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 2245.26513671875
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 1823.327880859375
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 642.4456787109375
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 2 20.0 1329.50910109 (18.385807405229915, 10)
loss 2705.09716796875
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 0 22.0 1352.9188695 (18.671267839956315, 8)
loss 4301.880859375
Current State,action,reward,Response time,Next State:  (8, 18.671267839956315) 3 21.0 1462.2819013 (19.02839494033929, 9)
loss 1175.8365478515625
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 2 21.0 1419.16011 (19.286321916040979, 9)
loss 2066.47607421875
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 753.28564453125
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 0 22.0 1403.55780672 (19.213467265587269, 8)
loss 1596.731689453125
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 2 22.0 1493.97063558 (19.140765783401285, 8)
loss 1347.181396484375
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 1887.6553955078125
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 2742.294189453125
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 521.1777954101562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 1125.1661376953125
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 1971.71728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 1797.7283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 572.644287109375
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 3030.61669921875
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 2 20.0 1291.59856437 (16.84211602880065, 10)
loss 891.3178100585938
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 2 20.0 1271.03516211 (16.237094554670044, 10)
loss 1175.0860595703125
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 1 21.0 1238.94234737 (15.950694610794756, 9)
loss 822.5083618164062
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 825.1563110351562
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 0 22.0 1217.27964986 (15.550833128512703, 8)
loss 579.5001831054688
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 3 21.0 1279.90873428 (15.446694946204717, 9)
loss 887.2023315429688
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 0 23.0 1231.67579099 (15.750501603468638, 7)
loss 1581.3917236328125
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 3 22.0 1357.34892154 (15.817158911312735, 8)
loss 1263.9317626953125
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 2 22.0 1295.47409005 (15.829956988360925, 8)
loss 1227.5633544921875
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 4 20.0 1296.22207104 (15.892373986997768, 10)
loss 2336.180419921875
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 995.7351684570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 2636.23681640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 2932.25341796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 2046.6385498046875
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 1768.9498291015625
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 1368.7802734375
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 538.7630004882812
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 2003.155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1496.256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 3183.55029296875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 762.1956176757812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 4 19.0 1258.27117243 (17.052961248403161, 11)
loss 1860.6993408203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 1023.9213256835938
############ Running episode number: 32  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 1464.864501953125
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 483.1214294433594
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 4 19.0 1002.85899476 (11.61852219546234, 11)
loss 815.2811889648438
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 1905.59228515625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 2094.376708984375
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 648.4825439453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 4 19.0 974.730436685 (11.027107764209074, 11)
loss 3091.5302734375
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 602.4072265625
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 1 21.0 960.915933313 (10.931193889570471, 9)
loss 1179.367431640625
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 1629.7220458984375
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 2 20.0 951.434021987 (10.819208572963639, 10)
loss 1270.1737060546875
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 1816.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 2283.259521484375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 2074.833740234375
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 2670.496826171875
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 2 20.0 939.257231149 (10.552868829802469, 10)
loss 2798.35546875
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 0 22.0 937.427755072 (10.553846649940214, 8)
loss 1695.6044921875
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 1 23.0 987.860883917 (10.489125480251131, 7)
loss 2368.957275390625
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 4 21.0 1029.47716098 (10.448897752470936, 9)
loss 662.6116333007812
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 2 21.0 970.065772031 (10.433149880183072, 9)
loss 826.3187255859375
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 1 22.0 969.241448633 (10.44185150623065, 8)
loss 1083.5771484375
Current State,action,reward,Response time,Next State:  (8, 10.44185150623065) 3 21.0 981.315350702 (10.370942817486826, 9)
loss 1038.35205078125
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 3026.335693359375
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 2986.989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 589.2325439453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 1771.4468994140625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 1158.6737060546875
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 1102.211669921875
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 2 21.0 961.129617982 (10.268274366284802, 9)
loss 738.4528198242188
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 2 21.0 960.611029141 (10.335411397720526, 9)
loss 825.1046142578125
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 4 19.0 964.125321415 (10.305649118067803, 11)
loss 2335.465576171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 2393.091064453125
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 1274.286376953125
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 1256.7681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 2943.37841796875
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 1595.687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 495.25006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 2657.6435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 4660.626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1464.7115478515625
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 3364.296142578125
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 1 22.0 975.14992417 (10.655373370049301, 8)
loss 1323.20166015625
Current State,action,reward,Response time,Next State:  (8, 10.655373370049301) 4 20.0 993.794592261 (10.624473674922116, 10)
loss 2034.350341796875
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 2175.76611328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 3418.182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1828.0906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 1776.8834228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 2125.404296875
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 2494.802734375
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 2 21.0 1041.82165315 (12.19918626616789, 9)
loss 2001.413818359375
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 1358.6824951171875
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 1 21.0 1040.79092857 (13.168618569876575, 9)
loss 2489.854736328125
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 967.7127685546875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 2 20.0 1101.69413046 (14.283719188889453, 10)
loss 2386.3466796875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 1607.7236328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 3317.068115234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 691.4595336914062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1825.899658203125
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 772.0870361328125
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 4 19.0 1306.26286107 (17.534967586021782, 11)
loss 2164.62060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1932.46240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 2039.326171875
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 551.6105346679688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 3068.375244140625
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 781.1509399414062
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 2 20.0 1387.00434183 (19.286321916040979, 10)
loss 1402.6544189453125
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 0 22.0 1400.68584406 (19.340464848017284, 8)
loss 1659.924560546875
Current State,action,reward,Response time,Next State:  (8, 19.340464848017284) 3 21.0 1501.39298325 (19.213467265587269, 9)
loss 642.9029541015625
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 0 23.0 1428.84773289 (19.140765783401285, 7)
loss 1320.1097412109375
Current State,action,reward,Response time,Next State:  (7, 19.140765783401285) 2 23.0 1568.61910246 (19.385636054792762, 7)
loss 3761.47265625
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 3 22.0 1583.87861729 (19.223969507401588, 8)
loss 640.2849731445312
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 1324.47412109375
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 1791.4366455078125
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 1 21.0 1389.93285614 (18.668181536495972, 9)
loss 2000.6678466796875
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 2 21.0 1400.30471596 (18.375894992990247, 9)
loss 3080.065673828125
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 458.56365966796875
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 0 22.0 1323.29060362 (17.229782241685768, 8)
loss 2359.043212890625
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 4 20.0 1378.03457101 (16.84211602880065, 10)
loss 1384.43896484375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 4 19.0 1271.03516211 (16.237094554670044, 11)
loss 1287.2164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 1109.134765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 1098.9378662109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 2181.14501953125
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 2288.399658203125
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 2 21.0 1231.67579099 (15.750501603468638, 9)
loss 588.4456176757812
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 1363.8807373046875
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 0 22.0 1216.66724247 (15.829956988360925, 8)
loss 1401.2266845703125
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 1 23.0 1296.22207104 (15.892373986997768, 7)
loss 2413.821044921875
Current State,action,reward,Response time,Next State:  (7, 15.892373986997768) 2 23.0 1366.1899447 (15.954793861767499, 7)
loss 1113.281005859375
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 3 22.0 1370.07974724 (16.004586266677634, 8)
loss 1138.7525634765625
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 3 21.0 1306.42824342 (16.017694914042416, 9)
loss 1438.2940673828125
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 2 21.0 1261.56482143 (15.947547279389703, 9)
loss 579.107666015625
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 2130.64892578125
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 763.0179443359375
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 640.7080078125
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 0 23.0 1272.63886489 (16.295120821876548, 7)
loss 2789.68017578125
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 3 22.0 1391.28781087 (16.667936385136993, 8)
loss 574.8037109375
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 1 23.0 1345.1976051 (16.836383524612351, 7)
loss 1556.970947265625
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 1 24.0 1425.01753312 (16.845818065953559, 6)
loss 3306.399169921875
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 3 23.0 1511.56621672 (17.052961248403161, 7)
loss 1479.9625244140625
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 1 24.0 1438.51394887 (17.215992726625572, 6)
loss 1386.77099609375
############ Running episode number: 33  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 1135.9393310546875
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 0 22.0 1012.7846064 (11.786394321941378, 8)
loss 654.5684204101562
Current State,action,reward,Response time,Next State:  (8, 11.786394321941378) 4 20.0 1059.89687994 (11.61852219546234, 10)
loss 1155.1883544921875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
loss 456.7582702636719
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 1891.1448974609375
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 3928.970947265625
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 1069.8116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 652.4635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 2264.828369140625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 1559.154296875
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 3558.4501953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 754.9996337890625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 2544.252685546875
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 4 19.0 986.979077927 (10.644925616761762, 11)
loss 684.1507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 2056.644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 2195.77294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 2506.361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1003.9278564453125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 583.526611328125
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 2 20.0 931.912703681 (10.433149880183072, 10)
loss 3056.135009765625
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 1792.24560546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 3194.234130859375
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 530.5076293945312
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 2120.732421875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 3453.96630859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 1178.1544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 1620.891845703125
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 605.2777709960938
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 4 19.0 961.129617982 (10.268274366284802, 11)
loss 2698.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 2759.728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 2384.005615234375
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 2164.265869140625
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 3702.229248046875
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 1327.19775390625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 2520.548828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 431.3581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 1813.71630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 583.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 831.701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1353.770263671875
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 1024.2393798828125
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 1403.29443359375
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 1877.9024658203125
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 2227.025634765625
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 479.2076721191406
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 4 19.0 994.97675791 (11.039747673816453, 11)
loss 417.72161865234375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 1151.232421875
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 1 21.0 975.550709187 (11.670334358779868, 9)
loss 1724.5623779296875
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 2 21.0 1034.00195058 (11.819721938468785, 9)
loss 593.1184692382812
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 0 23.0 1041.82165315 (12.19918626616789, 7)
loss 3240.677734375
Current State,action,reward,Response time,Next State:  (7, 12.19918626616789) 4 21.0 1136.04255897 (12.501496275411796, 9)
loss 2215.08642578125
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 1 22.0 1077.50917513 (13.168618569876575, 8)
loss 539.4305419921875
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 3 21.0 1140.68069275 (13.649658108197247, 9)
loss 1371.5157470703125
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 4 19.0 1137.6097809 (14.283719188889453, 11)
loss 1317.364990234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 1763.5538330078125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 1293.7852783203125
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 2 19.0 1179.43847566 (15.836943704090487, 11)
loss 1923.5977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 2858.0771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1274.776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 796.1480102539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 502.840576171875
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 861.1109619140625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 4 19.0 1329.50910109 (18.385807405229915, 11)
loss 456.1748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 471.47808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 759.1551513671875
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 714.7526245117188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 2627.234375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 1082.7039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 435.2175598144531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 448.6246337890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 2742.331298828125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 474.33819580078125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 470.02984619140625
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 699.5098266601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 1648.381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 1059.7900390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 524.785888671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 721.7568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 2582.586669921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 1852.89599609375
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 434.8673400878906
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 2 21.0 1251.67208827 (15.550833128512703, 9)
loss 1750.0728759765625
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 0 23.0 1237.12691092 (15.446694946204717, 7)
loss 850.305419921875
Current State,action,reward,Response time,Next State:  (7, 15.446694946204717) 4 21.0 1338.4166838 (15.750501603468638, 9)
loss 2461.46435546875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 4 19.0 1247.57857022 (15.817158911312735, 11)
loss 1211.0697021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 2103.6025390625
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 1057.7960205078125
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 1478.8516845703125
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 2 20.0 1223.96796344 (16.004586266677634, 10)
loss 1379.8836669921875
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 1 21.0 1226.60915635 (16.017694914042416, 9)
loss 1157.1942138671875
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 415.0988464355469
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 1 21.0 1223.58357506 (16.11465619633363, 9)
loss 1528.9716796875
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 1599.562255859375
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 1386.7982177734375
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 523.8048706054688
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 1638.86669921875
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 2 21.0 1295.6017535 (16.836383524612351, 9)
loss 626.593017578125
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 2980.793212890625
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 1 21.0 1271.23153331 (17.052961248403161, 9)
loss 495.31256103515625
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 4 19.0 1315.75590499 (17.215992726625572, 11)
loss 1867.02880859375
############ Running episode number: 34  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 3722.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 2518.94189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1761.296875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 479.889892578125
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 0 23.0 1023.46894667 (11.336751742492702, 7)
loss 527.1383056640625
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 0 25.0 1082.29845875 (11.25610796929319, 5)
loss 1853.86572265625
Current State,action,reward,Response time,Next State:  (5, 11.25610796929319) 2 25.0 1291.80100003 (11.027107764209074, 5)
loss 1365.2752685546875
Current State,action,reward,Response time,Next State:  (5, 11.027107764209074) 3 24.0 1270.75995258 (10.995673623987257, 6)
loss 1402.2484130859375
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 0 26.0 1124.64289336 (10.931193889570471, 4)
loss 1471.6107177734375
Current State,action,reward,Response time,Next State:  (4, 10.931193889570471) 4 24.0 1426.9014838 (10.816918347608043, 6)
loss 733.8344116210938
Current State,action,reward,Response time,Next State:  (6, 10.816918347608043) 3 23.0 1112.82017919 (10.819208572963639, 7)
loss 2437.300537109375
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 4 21.0 1050.04686027 (10.768325938188134, 9)
loss 1219.5504150390625
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 716.1954345703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 4 19.0 949.051873418 (10.644925616761762, 11)
loss 621.4801635742188
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 432.9218444824219
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 3 20.0 977.313511661 (10.552868829802469, 10)
loss 546.4868774414062
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 774.35302734375
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 1432.71923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 1594.6829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 629.781005859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 1215.01611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 634.7631225585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 498.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 3677.31689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 3042.084716796875
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 2215.68994140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 847.4124145507812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 582.1121826171875
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 460.83966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 518.5889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 2969.011962890625
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 771.0303955078125
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 1271.3487548828125
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 1641.330078125
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 0 22.0 920.672316722 (10.236272697871373, 8)
loss 1472.5064697265625
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 1 23.0 969.300339391 (10.369891240151098, 7)
loss 1225.4080810546875
Current State,action,reward,Response time,Next State:  (7, 10.369891240151098) 0 25.0 1022.04687291 (10.316955310454549, 5)
loss 1200.135009765625
Current State,action,reward,Response time,Next State:  (5, 10.316955310454549) 0 27.0 1205.50956788 (10.333617326102203, 3)
loss 1851.3046875
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.333617326102203) 0 -61.2347599131 1932.74854487 (10.390165524255663, 2)
loss 1885.0281982421875
Current State,action,reward,Response time,Next State:  (2, 10.390165524255663) 3 27.0 2892.34759913 (10.425974763084863, 3)
loss 2296.761962890625
Current State,action,reward,Response time,Next State:  (3, 10.425974763084863) 3 26.0 1950.9125956 (10.546025383098053, 4)
loss 5198.39453125
Current State,action,reward,Response time,Next State:  (4, 10.546025383098053) 2 26.0 1383.14465057 (10.655373370049301, 4)
loss 1888.87939453125
Current State,action,reward,Response time,Next State:  (4, 10.655373370049301) 3 25.0 1395.56706193 (10.624473674922116, 5)
loss 2034.928955078125
Current State,action,reward,Response time,Next State:  (5, 10.624473674922116) 1 26.0 1233.76503821 (10.771376986314287, 4)
loss 1981.7501220703125
Current State,action,reward,Response time,Next State:  (4, 10.771376986314287) 1 22.0983388322 1408.74558199 (10.924797168745895, 3)
loss 431.5962219238281
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 10.924797168745895) 0 -81.4356611244 2049.01661168 (11.039747673816453, 2)
loss 3122.952880859375
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 11.039747673816453) 0 -88.6450020603 3094.35661124 (11.271571944085663, 2)
loss 737.2251586914062
Current State,action,reward,Response time,Next State:  (2, 11.271571944085663) 3 7.43576721475 3166.4500206 (11.670334358779868, 3)
loss 1139.197265625
Current State,action,reward,Response time,Next State:  (3, 11.670334358779868) 3 26.0 2195.64232785 (11.819721938468785, 4)
loss 3637.9716796875
Current State,action,reward,Response time,Next State:  (4, 11.819721938468785) 1 -2.96522829663 1527.84217079 (12.19918626616789, 3)
loss 2038.83447265625
Current State,action,reward,Response time,Next State:  (3, 12.19918626616789) 1 -126.893559123 2299.65228297 (12.501496275411796, 2)
loss 1538.8695068359375
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 12.501496275411796) 1 -147.63992904 3548.93559123 (13.168618569876575, 2)
loss 2141.762451171875
Current State,action,reward,Response time,Next State:  (2, 13.168618569876575) 2 -162.599441396 3756.3992904 (13.649658108197247, 2)
loss 869.2021484375
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 13.649658108197247) 1 -182.317663736 3905.99441396 (14.283719188889453, 2)
loss 707.1282958984375
Current State,action,reward,Response time,Next State:  (2, 14.283719188889453) 4 26.0 4103.17663736 (14.677479537099185, 4)
loss 2116.393310546875
Current State,action,reward,Response time,Next State:  (4, 14.677479537099185) 3 25.0 1852.49597955 (15.353965082180355, 5)
loss 650.0062866210938
Current State,action,reward,Response time,Next State:  (5, 15.353965082180355) 2 25.0 1668.32121291 (15.836943704090487, 5)
loss 804.3534545898438
Current State,action,reward,Response time,Next State:  (5, 15.836943704090487) 0 -86.8984248291 1712.69836138 (16.466876895473597, 3)
loss 929.3858032226562
Current State,action,reward,Response time,Next State:  (3, 16.466876895473597) 1 -262.796551697 3138.98424829 (16.871606159345866, 2)
loss 1884.1962890625
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.871606159345866) 0 -283.425965015 4907.96551697 (17.534967586021782, 2)
loss 1937.53759765625
Current State,action,reward,Response time,Next State:  (2, 17.534967586021782) 3 -110.546347383 5114.25965015 (17.669285735563751, 3)
loss 1971.5733642578125
Current State,action,reward,Response time,Next State:  (3, 17.669285735563751) 3 3.63583334771 3375.46347383 (17.944480812078613, 4)
loss 544.5803833007812
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 4 24.0 2223.64166652 (18.385807405229915, 6)
loss 2026.0306396484375
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 3 23.0 1613.41973487 (18.671267839956315, 7)
loss 1748.0655517578125
Current State,action,reward,Response time,Next State:  (7, 18.671267839956315) 0 24.4064010816 1539.36152541 (19.02839494033929, 5)
loss 2835.296142578125
Current State,action,reward,Response time,Next State:  (5, 19.02839494033929) 0 -142.34879684 2005.93598918 (19.286321916040979, 3)
loss 1327.76513671875
Current State,action,reward,Response time,Next State:  (3, 19.286321916040979) 3 -12.2231581375 3693.4879684 (19.340464848017284, 4)
loss 1647.6661376953125
Current State,action,reward,Response time,Next State:  (4, 19.340464848017284) 0 -335.624453279 2382.23158138 (19.213467265587269, 2)
loss 1998.036376953125
Current State,action,reward,Response time,Next State:  (2, 19.213467265587269) 3 -139.486126476 5636.24453279 (19.140765783401285, 3)
loss 642.5346069335938
Current State,action,reward,Response time,Next State:  (3, 19.140765783401285) 2 -144.302020223 3664.86126476 (19.385636054792762, 3)
loss 1195.37548828125
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 19.385636054792762) 0 -335.951055156 3713.02020223 (19.223969507401588, 2)
loss 3109.60205078125
Current State,action,reward,Response time,Next State:  (2, 19.223969507401588) 3 -141.750731563 5639.51055156 (19.25591252280865, 3)
loss 479.8423156738281
Current State,action,reward,Response time,Next State:  (3, 19.25591252280865) 3 -9.30510611419 3687.50731563 (19.08360399753829, 4)
loss 1584.814208984375
Current State,action,reward,Response time,Next State:  (4, 19.08360399753829) 4 24.0 2353.05106114 (18.668181536495972, 6)
loss 1233.23828125
Current State,action,reward,Response time,Next State:  (6, 18.668181536495972) 4 22.0 1632.09570747 (18.375894992990247, 8)
loss 975.9542846679688
Current State,action,reward,Response time,Next State:  (8, 18.375894992990247) 3 21.0 1445.01889581 (17.82724819986867, 9)
loss 1120.2264404296875
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 528.6206665039062
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 0 22.0 1291.59856437 (16.84211602880065, 8)
loss 1725.579833984375
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 4 20.0 1355.37749867 (16.237094554670044, 10)
loss 649.7054443359375
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 1 21.0 1238.94234737 (15.950694610794756, 9)
loss 1836.8531494140625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 1 22.0 1258.0576862 (15.828704162850809, 8)
loss 1713.8095703125
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 1 23.0 1296.14884991 (15.550833128512703, 7)
loss 3388.281494140625
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 1 24.0 1344.9062349 (15.446694946204717, 6)
loss 632.50537109375
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 1 25.0 1419.0294644 (15.750501603468638, 5)
loss 2827.45556640625
Current State,action,reward,Response time,Next State:  (5, 15.750501603468638) 3 24.0 1704.75586919 (15.817158911312735, 6)
loss 675.8111572265625
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 3 23.0 1443.53161985 (15.829956988360925, 7)
loss 3140.82421875
Current State,action,reward,Response time,Next State:  (7, 15.829956988360925) 3 22.0 1362.30032139 (15.892373986997768, 8)
loss 2115.592529296875
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 2 22.0 1299.87001973 (15.954793861767499, 8)
loss 1677.685302734375
Current State,action,reward,Response time,Next State:  (8, 15.954793861767499) 3 21.0 1303.51813652 (16.004586266677634, 9)
loss 3167.007080078125
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 1255.14990234375
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 1 21.0 1227.30449265 (15.947547279389703, 9)
loss 1151.4918212890625
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 2450.091796875
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 650.6813354492188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 3392.365234375
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 3551.0517578125
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 417.21551513671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 2116.853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 2295.78662109375
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 1734.586669921875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 2 20.0 1282.21925533 (17.215992726625572, 10)
loss 760.2991333007812
############ Running episode number: 35  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 1975.7645263671875
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 1757.8291015625
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 0 22.0 1002.85899476 (11.61852219546234, 8)
loss 2531.12158203125
Current State,action,reward,Response time,Next State:  (8, 11.61852219546234) 0 24.0 1050.08562792 (11.469111876584304, 6)
loss 3021.400390625
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 3 23.0 1155.95567613 (11.336751742492702, 7)
loss 2316.62841796875
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 3 22.0 1082.29845875 (11.25610796929319, 8)
loss 2763.485107421875
Current State,action,reward,Response time,Next State:  (8, 11.25610796929319) 1 23.0 1028.90440276 (11.027107764209074, 7)
loss 2578.385986328125
Current State,action,reward,Response time,Next State:  (7, 11.027107764209074) 1 24.0 1063.00245825 (10.995673623987257, 6)
loss 1876.1163330078125
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 2 24.0 1124.64289336 (10.931193889570471, 6)
loss 1901.9034423828125
Current State,action,reward,Response time,Next State:  (6, 10.931193889570471) 3 23.0 1120.37826137 (10.816918347608043, 7)
loss 480.49835205078125
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 3 22.0 1049.90414092 (10.819208572963639, 8)
loss 2077.02294921875
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 3 21.0 1003.36990711 (10.768325938188134, 9)
loss 492.7232360839844
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 1955.78369140625
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 1770.890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 3326.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1822.028076171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 3641.04833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 2084.74560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 2218.66845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 2054.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 707.1453247070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1178.159423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 702.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 836.575927734375
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 752.4363403320312
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 4 19.0 926.348821567 (10.319026962956018, 11)
loss 1749.6103515625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 755.7135009765625
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 1662.55712890625
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 875.4076538085938
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 3991.854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 2344.191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1061.4412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 2254.606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 2579.524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 790.7914428710938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 3390.564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 964.747802734375
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 1989.576904296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 4 19.0 914.152581784 (10.390165524255663, 11)
loss 985.3878784179688
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 4062.6435546875
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 4 19.0 968.865866662 (10.546025383098053, 11)
loss 3010.6962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1010.2015991210938
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 1078.2103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 1313.495361328125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 2067.15234375
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 824.8477172851562
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 3051.169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1931.848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 1811.0428466796875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 1569.4822998046875
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 502.08807373046875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 2693.43994140625
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 1081.399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 2443.298828125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 3897.729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 1235.20703125
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 1879.6143798828125
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 2538.1787109375
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 1 21.0 1251.130943 (16.871606159345866, 9)
loss 2925.007568359375
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 1 22.0 1306.26286107 (17.534967586021782, 8)
loss 1391.4541015625
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 4 20.0 1395.8710659 (17.669285735563751, 10)
loss 1587.5533447265625
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 675.1143798828125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 768.4500122070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 2796.013916015625
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 0 21.0 1354.73183582 (19.02839494033929, 9)
loss 744.6978759765625
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 1 22.0 1419.16011 (19.286321916040979, 8)
loss 1397.61376953125
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 4 20.0 1498.22861069 (19.340464848017284, 10)
loss 2137.828857421875
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 1 21.0 1403.55780672 (19.213467265587269, 9)
loss 2913.776611328125
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 4 19.0 1428.84773289 (19.140765783401285, 11)
loss 683.56787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 659.9530639648438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 1324.9632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1993.7999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 2411.84716796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 1201.108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 3050.8193359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 590.7481689453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 442.17645263671875
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 516.30126953125
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 1113.0677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 2423.0546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 1584.4132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 1392.662353515625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 2645.916259765625
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 688.7349243164062
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 2743.48681640625
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 1204.2117919921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 3622.330322265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 2345.99609375
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 2151.125732421875
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 3000.358154296875
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 0 22.0 1227.30449265 (15.947547279389703, 8)
loss 1561.3624267578125
Current State,action,reward,Response time,Next State:  (8, 15.947547279389703) 2 22.0 1303.0946115 (16.11465619633363, 8)
loss 708.1585693359375
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 0 24.0 1312.86125789 (16.147078378791146, 6)
loss 1153.447998046875
Current State,action,reward,Response time,Next State:  (6, 16.147078378791146) 3 23.0 1465.35219849 (16.229253414601111, 7)
loss 2112.3564453125
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 2 23.0 1387.18316937 (16.295120821876548, 7)
loss 2342.749755859375
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 3 22.0 1391.28781087 (16.667936385136993, 8)
loss 1060.4556884765625
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 3 21.0 1345.1976051 (16.836383524612351, 9)
loss 3205.150146484375
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 982.0100708007812
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 4 19.0 1271.23153331 (17.052961248403161, 11)
loss 1865.423583984375
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 1187.7052001953125
############ Running episode number: 36  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 2352.08154296875
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 1194.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1341.6131591796875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 745.4505615234375
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 1 22.0 1023.46894667 (11.336751742492702, 8)
loss 2206.357666015625
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 4 20.0 1033.61761156 (11.25610796929319, 10)
loss 1480.28564453125
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 839.5541381835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 769.520751953125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 486.18756103515625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 0 22.0 957.495664348 (10.816918347608043, 8)
loss 2041.281982421875
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 4 20.0 1003.23605536 (10.819208572963639, 10)
loss 892.0355834960938
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 4 19.0 951.555504911 (10.768325938188134, 11)
loss 929.9733276367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 916.3236083984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 2297.2568359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 1474.7359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 676.248291015625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 1488.9373779296875
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 2 20.0 937.479622653 (10.489125480251131, 10)
loss 1685.175537109375
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 2 20.0 934.046546974 (10.448897752470936, 10)
loss 1797.890380859375
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 2 20.0 931.912703681 (10.433149880183072, 10)
loss 493.11151123046875
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 1040.2515869140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 3059.332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 545.193115234375
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 1673.3472900390625
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 1527.749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 2804.4384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 1985.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 769.2188720703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 2663.992431640625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 3085.200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 2952.49072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 2476.20849609375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 3326.271240234375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 4349.11279296875
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 0 23.0 958.973513426 (10.236272697871373, 7)
loss 2661.901611328125
Current State,action,reward,Response time,Next State:  (7, 10.236272697871373) 2 23.0 1013.72020216 (10.369891240151098, 7)
loss 2221.364013671875
Current State,action,reward,Response time,Next State:  (7, 10.369891240151098) 1 24.0 1022.04687291 (10.316955310454549, 6)
loss 2983.03466796875
Current State,action,reward,Response time,Next State:  (6, 10.316955310454549) 4 22.0 1079.75307088 (10.333617326102203, 8)
loss 1739.5592041015625
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 3 21.0 974.989626232 (10.390165524255663, 9)
loss 770.25537109375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 0 23.0 966.991429728 (10.425974763084863, 7)
loss 2797.7685546875
Current State,action,reward,Response time,Next State:  (7, 10.425974763084863) 4 21.0 1025.54181472 (10.546025383098053, 9)
loss 3054.273193359375
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 1750.04150390625
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 1 21.0 942.865015335 (10.624473674922116, 9)
loss 514.6297607421875
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 668.77001953125
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 2 20.0 949.018321829 (10.924797168745895, 10)
loss 2108.50048828125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 0 22.0 957.1563561 (11.039747673816453, 8)
loss 588.7051391601562
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 3 21.0 1016.25926965 (11.271571944085663, 9)
loss 1192.8570556640625
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 2434.29638671875
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 1 21.0 996.702699398 (11.819721938468785, 9)
loss 2568.73388671875
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 1 22.0 1041.82165315 (12.19918626616789, 8)
loss 1268.2349853515625
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 0 24.0 1084.02242049 (12.501496275411796, 6)
loss 1572.4837646484375
Current State,action,reward,Response time,Next State:  (6, 12.501496275411796) 3 23.0 1224.23665732 (13.168618569876575, 7)
loss 727.5354614257812
Current State,action,reward,Response time,Next State:  (7, 13.168618569876575) 3 22.0 1196.45441106 (13.649658108197247, 8)
loss 952.7919311523438
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 4 20.0 1168.79494995 (14.283719188889453, 10)
loss 1836.517822265625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 4 19.0 1135.32732476 (14.677479537099185, 11)
loss 1297.192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 3097.973388671875
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 1736.33251953125
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 1318.4320068359375
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 3285.2734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1537.7965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1578.30712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 1649.8460693359375
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 468.32635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 947.7688598632812
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 0 21.0 1354.73183582 (19.02839494033929, 9)
loss 1085.5908203125
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 1 22.0 1419.16011 (19.286321916040979, 8)
loss 2851.2333984375
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 0 24.0 1498.22861069 (19.340464848017284, 6)
loss 4334.626953125
Current State,action,reward,Response time,Next State:  (6, 19.340464848017284) 0 -10.7804098492 1676.55992467 (19.213467265587269, 4)
loss 1574.677490234375
Current State,action,reward,Response time,Next State:  (4, 19.213467265587269) 3 23.373912818 2367.80409849 (19.140765783401285, 5)
loss 751.9508056640625
Current State,action,reward,Response time,Next State:  (5, 19.140765783401285) 0 -144.302020223 2016.26087182 (19.385636054792762, 3)
loss 2112.6328125
Current State,action,reward,Response time,Next State:  (3, 19.385636054792762) 3 -10.8997199267 3713.02020223 (19.223969507401588, 4)
loss 1343.60302734375
Current State,action,reward,Response time,Next State:  (4, 19.223969507401588) 2 -11.2626066047 2368.99719927 (19.25591252280865, 4)
loss 1814.4837646484375
Current State,action,reward,Response time,Next State:  (4, 19.25591252280865) 4 24.0 2372.62606605 (19.08360399753829, 6)
loss 1217.296875
Current State,action,reward,Response time,Next State:  (6, 19.08360399753829) 3 23.0 1659.57137766 (18.668181536495972, 7)
loss 1806.1671142578125
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 4 21.0 1539.16919707 (18.375894992990247, 9)
loss 918.04931640625
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 4 19.0 1385.00495784 (17.82724819986867, 11)
loss 2553.483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 1574.86474609375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 2767.77392578125
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 0 22.0 1271.03516211 (16.237094554670044, 8)
loss 1813.779296875
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 3 21.0 1320.01714264 (15.950694610794756, 9)
loss 1780.751220703125
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 4 19.0 1258.0576862 (15.828704162850809, 11)
loss 1793.1407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 675.8311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 477.2970275878906
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 1064.4124755859375
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 1 20.0 1200.39231205 (15.817158911312735, 10)
loss 1809.3583984375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 602.665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 786.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 1530.2264404296875
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 880.5181274414062
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 1 22.0 1260.87864843 (16.017694914042416, 8)
loss 405.31719970703125
Current State,action,reward,Response time,Next State:  (8, 16.017694914042416) 1 23.0 1307.19437562 (15.947547279389703, 7)
loss 1789.370849609375
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 3 22.0 1369.62816392 (16.11465619633363, 8)
loss 483.9270935058594
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 3 21.0 1312.86125789 (16.147078378791146, 9)
loss 1200.8662109375
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 0 23.0 1268.3374073 (16.229253414601111, 7)
loss 1542.274169921875
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 3 22.0 1387.18316937 (16.295120821876548, 8)
loss 718.5923461914062
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 3 21.0 1323.40847593 (16.667936385136993, 9)
loss 686.162841796875
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 2 21.0 1295.6017535 (16.836383524612351, 9)
loss 4092.921142578125
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 2 21.0 1304.41912996 (16.845818065953559, 9)
loss 1725.544921875
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 0 23.0 1304.91298164 (17.052961248403161, 7)
loss 1821.0633544921875
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 2 23.0 1438.51394887 (17.215992726625572, 7)
loss 1266.7724609375
############ Running episode number: 37  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1740.1412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 743.778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1775.472900390625
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 696.5272216796875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 4 19.0 986.02903554 (11.336751742492702, 11)
loss 2161.22412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1429.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1930.0142822265625
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 753.1309814453125
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 1198.1861572265625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 1542.5638427734375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 2022.132080078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 1584.3895263671875
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 1743.4471435546875
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 2 21.0 986.979077927 (10.644925616761762, 9)
loss 2017.85498046875
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 1565.3245849609375
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
loss 2774.072265625
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 920.5576782226562
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 0 22.0 937.479622653 (10.489125480251131, 8)
loss 1941.707763671875
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 0 24.0 984.078268423 (10.448897752470936, 6)
loss 1496.9227294921875
Current State,action,reward,Response time,Next State:  (6, 10.448897752470936) 0 26.0 1088.47962603 (10.433149880183072, 4)
loss 871.2244873046875
Current State,action,reward,Response time,Next State:  (4, 10.433149880183072) 2 26.0 1370.32149792 (10.44185150623065, 4)
loss 2080.715576171875
Current State,action,reward,Response time,Next State:  (4, 10.44185150623065) 3 25.0 1371.31004084 (10.370942817486826, 5)
loss 963.1790771484375
Current State,action,reward,Response time,Next State:  (5, 10.370942817486826) 3 24.0 1210.47005993 (10.42733414151318, 6)
loss 1960.6541748046875
Current State,action,reward,Response time,Next State:  (6, 10.42733414151318) 4 22.0 1087.05342808 (10.388469398680568, 8)
loss 675.4423828125
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 0 24.0 978.19544437 (10.344006106602812, 6)
loss 5595.8583984375
Current State,action,reward,Response time,Next State:  (6, 10.344006106602812) 3 23.0 1081.54218635 (10.319026962956018, 7)
loss 3102.668701171875
Current State,action,reward,Response time,Next State:  (7, 10.319026962956018) 3 22.0 1018.87717743 (10.30224719189987, 8)
loss 1761.6236572265625
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 3 21.0 973.15620517 (10.278181486298042, 9)
loss 707.9264526367188
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 677.122314453125
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 1668.199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1817.3751220703125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 1533.3111572265625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 2375.82861328125
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 0 22.0 922.767593645 (10.236991269871366, 8)
loss 680.5870361328125
Current State,action,reward,Response time,Next State:  (8, 10.236991269871366) 2 22.0 969.342336184 (10.236272697871373, 8)
loss 693.8516845703125
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 2 22.0 969.300339391 (10.369891240151098, 8)
loss 816.1235961914062
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 2 22.0 977.109647703 (10.316955310454549, 8)
loss 2460.856201171875
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 2 22.0 974.015818144 (10.333617326102203, 8)
loss 423.35797119140625
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 3 21.0 974.989626232 (10.390165524255663, 9)
loss 2643.029052734375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 2939.578857421875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 3863.486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 2826.07421875
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 954.5441284179688
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 2 20.0 941.225969064 (10.771376986314287, 10)
loss 1214.7857666015625
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 2 20.0 949.018321829 (10.924797168745895, 10)
loss 1088.925048828125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 857.541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 1765.134521484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 3869.6357421875
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 658.1234741210938
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 2 20.0 1004.62682792 (12.19918626616789, 10)
loss 3703.291015625
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 859.6988525390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 1901.578369140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 669.020751953125
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
loss 1464.5372314453125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 1255.173583984375
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 4 19.0 1191.41116041 (15.353965082180355, 11)
loss 635.3767700195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1711.3028564453125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 2832.7333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 708.9553833007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 3587.281982421875
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 1972.201416015625
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 0 22.0 1314.91162813 (17.944480812078613, 8)
loss 507.6570129394531
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 0 24.0 1419.80498244 (18.385807405229915, 6)
loss 893.975830078125
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 4 22.0 1613.41973487 (18.671267839956315, 8)
loss 843.8071899414062
Current State,action,reward,Response time,Next State:  (8, 18.671267839956315) 1 23.0 1462.2819013 (19.02839494033929, 7)
loss 1617.2330322265625
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 0 22.0365107433 1561.61651886 (19.286321916040979, 5)
loss 1924.1795654296875
Current State,action,reward,Response time,Next State:  (5, 19.286321916040979) 3 24.0 2029.63489257 (19.340464848017284, 6)
loss 1211.593994140625
Current State,action,reward,Response time,Next State:  (6, 19.340464848017284) 4 22.0 1676.55992467 (19.213467265587269, 8)
loss 1671.7734375
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 2 22.0 1493.97063558 (19.140765783401285, 8)
loss 961.77197265625
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 1 23.0 1489.72161235 (19.385636054792762, 7)
loss 2158.954345703125
Current State,action,reward,Response time,Next State:  (7, 19.385636054792762) 1 24.0 1583.87861729 (19.223969507401588, 6)
loss 1407.36962890625
Current State,action,reward,Response time,Next State:  (6, 19.223969507401588) 0 -11.2626066047 1668.85502699 (19.25591252280865, 4)
loss 3330.8603515625
Current State,action,reward,Response time,Next State:  (4, 19.25591252280865) 1 -138.361918603 2372.62606605 (19.08360399753829, 3)
loss 2766.3798828125
Current State,action,reward,Response time,Next State:  (3, 19.08360399753829) 2 -130.191753965 3653.61918603 (18.668181536495972, 3)
loss 1945.3211669921875
Current State,action,reward,Response time,Next State:  (3, 18.668181536495972) 1 -309.577378636 3571.91753965 (18.375894992990247, 2)
loss 1024.94189453125
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 18.375894992990247) 0 -292.515395521 5375.77378636 (17.82724819986867, 2)
loss 781.8287963867188
Current State,action,reward,Response time,Next State:  (2, 17.82724819986867) 3 -101.90257811 5205.15395521 (17.229782241685768, 3)
loss 3105.3671875
Current State,action,reward,Response time,Next State:  (3, 17.229782241685768) 3 16.1591812239 3289.0257811 (16.84211602880065, 4)
loss 1777.9298095703125
Current State,action,reward,Response time,Next State:  (4, 16.84211602880065) 4 24.0 2098.40818776 (16.237094554670044, 6)
loss 3988.273193359375
Current State,action,reward,Response time,Next State:  (6, 16.237094554670044) 3 23.0 1471.30578788 (15.950694610794756, 7)
loss 3745.075439453125
Current State,action,reward,Response time,Next State:  (7, 15.950694610794756) 2 23.0 1369.82429532 (15.828704162850809, 7)
loss 472.17987060546875
Current State,action,reward,Response time,Next State:  (7, 15.828704162850809) 4 21.0 1362.22224939 (15.550833128512703, 9)
loss 407.9368591308594
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 1144.2808837890625
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 1565.376220703125
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 606.7569580078125
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 3008.768798828125
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 2 20.0 1217.34610485 (15.892373986997768, 10)
loss 2008.1573486328125
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 0 22.0 1220.65695786 (15.954793861767499, 8)
loss 2086.880859375
Current State,action,reward,Response time,Next State:  (8, 15.954793861767499) 3 21.0 1303.51813652 (16.004586266677634, 9)
loss 1970.5941162109375
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 4 19.0 1260.87864843 (16.017694914042416, 11)
loss 2141.642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1442.3990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 2740.96240234375
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 2159.86572265625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 1533.9232177734375
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 1970.0872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 996.2862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 1714.468017578125
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 717.762939453125
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 1626.1951904296875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 2067.67724609375
############ Running episode number: 38  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 2801.340087890625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 0 24.0 1070.83307124 (11.786394321941378, 6)
loss 1906.3682861328125
Current State,action,reward,Response time,Next State:  (6, 11.786394321941378) 0 26.0 1176.94045342 (11.61852219546234, 4)
loss 2337.11279296875
Current State,action,reward,Response time,Next State:  (4, 11.61852219546234) 4 24.0 1504.98499672 (11.469111876584304, 6)
loss 1758.98046875
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 3 23.0 1155.95567613 (11.336751742492702, 7)
loss 1970.7960205078125
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 2 23.0 1082.29845875 (11.25610796929319, 7)
loss 1901.87158203125
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 3 22.0 1077.27300243 (11.027107764209074, 8)
loss 474.8307800292969
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 3 21.0 1015.52053272 (10.995673623987257, 9)
loss 432.58837890625
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 1738.8851318359375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 1 21.0 957.495664348 (10.816918347608043, 9)
loss 3283.18505859375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 527.695556640625
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
loss 1375.6275634765625
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 4353.46923828125
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 665.2156372070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 3975.90625
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 892.3816528320312
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 1680.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1348.89404296875
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 697.1365356445312
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 3056.034912109375
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 3165.360107421875
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 1494.8309326171875
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 2 21.0 965.985215893 (10.42733414151318, 9)
loss 916.7086181640625
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 1090.0294189453125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 2 20.0 928.707336523 (10.344006106602812, 10)
loss 2700.608154296875
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 1 21.0 926.348821567 (10.319026962956018, 9)
loss 795.1069946289062
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 0 23.0 963.267677113 (10.30224719189987, 7)
loss 3490.16796875
Current State,action,reward,Response time,Next State:  (7, 10.30224719189987) 1 24.0 1017.83151695 (10.278181486298042, 6)
loss 1856.31494140625
Current State,action,reward,Response time,Next State:  (6, 10.278181486298042) 2 24.0 1077.18860481 (10.268274366284802, 6)
loss 2081.14990234375
Current State,action,reward,Response time,Next State:  (6, 10.268274366284802) 3 23.0 1076.53335675 (10.335411397720526, 7)
loss 3812.436767578125
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 3 22.0 1019.89820185 (10.305649118067803, 8)
loss 1552.4957275390625
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 3 21.0 973.355030047 (10.24826025489064, 9)
loss 3510.69384765625
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 0 23.0 959.563389179 (10.276491935146446, 7)
loss 1056.4151611328125
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 3 22.0 1016.22653355 (10.236991269871366, 8)
loss 418.525390625
Current State,action,reward,Response time,Next State:  (8, 10.236991269871366) 3 21.0 969.342336184 (10.236272697871373, 9)
loss 1064.6484375
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 3395.751220703125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 2 20.0 927.721874973 (10.316955310454549, 10)
loss 611.9932250976562
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 1343.37548828125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 501.3003234863281
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 1485.1072998046875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 2 20.0 930.696774523 (10.546025383098053, 10)
loss 3185.171630859375
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 4173.54833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 491.6971435546875
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 614.5330200195312
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 1220.51953125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 870.5173950195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 2567.453857421875
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 1273.538330078125
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 915.749755859375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 2025.165771484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 786.891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 2035.4102783203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 598.7340698242188
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 1713.361572265625
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 2 20.0 1135.32732476 (14.677479537099185, 10)
loss 2707.674560546875
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 0 22.0 1156.21398489 (15.353965082180355, 8)
loss 753.6964721679688
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 1332.010009765625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 1600.8165283203125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 0 22.0 1251.130943 (16.871606159345866, 8)
loss 2656.792236328125
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 4 20.0 1357.1010433 (17.534967586021782, 10)
loss 616.1638793945312
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 1505.1669921875
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 1868.6580810546875
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 3243.5126953125
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 1 21.0 1352.9188695 (18.671267839956315, 9)
loss 749.8692016601562
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 1713.0364990234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 4 19.0 1387.00434183 (19.286321916040979, 11)
loss 1946.6278076171875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 2734.694580078125
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 2620.80517578125
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 2108.57373046875
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 4 19.0 1425.04216908 (19.385636054792762, 11)
loss 975.1915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 424.27716064453125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 1746.1199951171875
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 646.7476196289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 2639.17919921875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 2699.427490234375
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 1 21.0 1352.39307459 (17.82724819986867, 9)
loss 1308.5498046875
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 2 21.0 1356.28600579 (17.229782241685768, 9)
loss 687.9959716796875
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 646.6408081054688
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 1036.840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 1743.31982421875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 701.222900390625
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 1 22.0 1251.67208827 (15.550833128512703, 8)
loss 1671.8880615234375
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 4 20.0 1279.90873428 (15.446694946204717, 10)
loss 905.7008056640625
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 671.652099609375
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 1 20.0 1200.39231205 (15.817158911312735, 10)
loss 1670.247802734375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 0 22.0 1216.66724247 (15.829956988360925, 8)
loss 620.451904296875
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
loss 1475.462890625
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 0 23.0 1255.00488935 (15.954793861767499, 7)
loss 690.876708984375
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 3 22.0 1370.07974724 (16.004586266677634, 8)
loss 1529.710205078125
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 3 21.0 1306.42824342 (16.017694914042416, 9)
loss 615.5480346679688
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 0 23.0 1261.56482143 (15.947547279389703, 7)
loss 1286.551513671875
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 2 23.0 1369.62816392 (16.11465619633363, 7)
loss 1754.32568359375
Current State,action,reward,Response time,Next State:  (7, 16.11465619633363) 0 25.0 1380.04184534 (16.147078378791146, 5)
loss 458.9909362792969
Current State,action,reward,Response time,Next State:  (5, 16.147078378791146) 3 24.0 1741.1942241 (16.229253414601111, 6)
loss 2082.184326171875
Current State,action,reward,Response time,Next State:  (6, 16.229253414601111) 2 24.0 1470.78718189 (16.295120821876548, 6)
loss 1145.00244140625
Current State,action,reward,Response time,Next State:  (6, 16.295120821876548) 3 23.0 1475.14359332 (16.667936385136993, 7)
loss 680.6157836914062
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 4 21.0 1414.52045804 (16.836383524612351, 9)
loss 2084.154052734375
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 417.948974609375
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 2049.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 1400.432861328125
############ Running episode number: 39  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 842.8883056640625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 884.6251220703125
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 415.5427551269531
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 1475.4664306640625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 0 22.0 986.02903554 (11.336751742492702, 8)
loss 799.0590209960938
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 3 21.0 1033.61761156 (11.25610796929319, 9)
loss 859.635498046875
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 1 22.0 1012.3192433 (11.027107764209074, 8)
loss 611.2957763671875
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 3 21.0 1015.52053272 (10.995673623987257, 9)
loss 1037.3896484375
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 1748.334228515625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 1953.4324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 839.094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 404.7496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 419.0995788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 1544.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 2318.250732421875
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 1767.9176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 896.10693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 777.1620483398438
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 1525.5897216796875
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 2 20.0 931.912703681 (10.433149880183072, 10)
loss 1537.28271484375
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 2040.5797119140625
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 720.721923828125
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 682.6696166992188
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 2 20.0 930.768881517 (10.388469398680568, 10)
loss 1540.1336669921875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 934.2259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 2483.50146484375
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 2563.703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 1617.5167236328125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 808.4314575195312
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 1005.70263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1551.2025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1356.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 417.50067138671875
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 602.4592895507812
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 1536.205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 602.6961669921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 740.7461547851562
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 2734.1435546875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 721.242919921875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 716.8748168945312
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 3931.833984375
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 1849.5032958984375
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 1414.8851318359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 2363.535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 618.4573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1728.8148193359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 415.29656982421875
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 717.3499145507812
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 607.3121948242188
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 2613.10107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 703.4571533203125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 1635.3033447265625
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 2 21.0 1112.429735 (13.649658108197247, 9)
loss 568.7092895507812
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 2572.740478515625
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 1013.6001586914062
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 970.6973876953125
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 1 22.0 1226.82184023 (15.836943704090487, 8)
loss 1445.65185546875
Current State,action,reward,Response time,Next State:  (8, 15.836943704090487) 3 21.0 1296.63040821 (16.466876895473597, 9)
loss 887.9935302734375
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 420.1741027832031
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 409.5423583984375
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 740.5269165039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 2138.094970703125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 937.5343017578125
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 4 19.0 1385.5238237 (18.671267839956315, 11)
loss 542.27587890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 520.1204223632812
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 1571.6939697265625
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 2763.53857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 1475.0853271484375
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 811.3060302734375
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 1753.785400390625
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 421.58984375
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 0 21.0 1383.93777195 (19.25591252280865, 9)
loss 613.6275024414062
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 2271.632080078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 4 19.0 1389.93285614 (18.668181536495972, 11)
loss 1603.8131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 1207.8355712890625
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 767.9407348632812
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 858.1322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 922.6941528320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 2146.828857421875
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 1768.376220703125
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 2 21.0 1258.0576862 (15.828704162850809, 9)
loss 3284.1357421875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 4 19.0 1251.67208827 (15.550833128512703, 11)
loss 1735.8818359375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 2513.437744140625
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 751.342529296875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 4 19.0 1213.1314661 (15.817158911312735, 11)
loss 1529.9483642578125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 2493.816162109375
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 1712.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 1744.509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 2772.702880859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 1942.166259765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 845.245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 414.1003112792969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 2973.573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 1099.8804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1132.2642822265625
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 1296.1385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 1181.64599609375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 680.2448120117188
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 1743.8399658203125
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 972.9724731445312
############ Running episode number: 40  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1678.2581787109375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 408.2114562988281
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 1436.793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 574.9566650390625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 2285.541015625
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 1743.1591796875
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 2111.841064453125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 786.1774291992188
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 2840.734619140625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 2132.04296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1793.10888671875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 1770.9747314453125
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 1871.034912109375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 1959.48046875
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 1669.7950439453125
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 0 22.0 939.257231149 (10.552868829802469, 8)
loss 1804.428955078125
Current State,action,reward,Response time,Next State:  (8, 10.552868829802469) 0 24.0 987.80373542 (10.553846649940214, 6)
loss 1756.1060791015625
Current State,action,reward,Response time,Next State:  (6, 10.553846649940214) 3 23.0 1095.42085229 (10.489125480251131, 7)
loss 474.2875671386719
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 3 22.0 1029.47716098 (10.448897752470936, 8)
loss 1703.93017578125
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 3 21.0 981.727167119 (10.433149880183072, 9)
loss 417.80023193359375
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 403.2714538574219
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 1 21.0 931.538941947 (10.370942817486826, 9)
loss 1364.0853271484375
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 910.9550170898438
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 0 22.0 930.768881517 (10.388469398680568, 8)
loss 608.8076782226562
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 1 23.0 978.19544437 (10.344006106602812, 7)
loss 906.4387817382812
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 0 25.0 1020.43379601 (10.319026962956018, 5)
loss 2010.563720703125
Current State,action,reward,Response time,Next State:  (5, 10.319026962956018) 1 26.0 1205.69991592 (10.30224719189987, 4)
loss 1467.5023193359375
Current State,action,reward,Response time,Next State:  (4, 10.30224719189987) 3 25.0 1355.45037786 (10.278181486298042, 5)
loss 2730.712646484375
Current State,action,reward,Response time,Next State:  (5, 10.278181486298042) 0 27.0 1201.9469428 (10.268274366284802, 3)
loss 434.0889587402344
Current State,action,reward,Response time,Next State:  (3, 10.268274366284802) 2 27.0 1919.89746459 (10.335411397720526, 3)
loss 820.4138793945312
Current State,action,reward,Response time,Next State:  (3, 10.335411397720526) 3 26.0 1933.10138715 (10.305649118067803, 4)
loss 699.6343383789062
Current State,action,reward,Response time,Next State:  (4, 10.305649118067803) 3 25.0 1355.83685161 (10.24826025489064, 5)
loss 1812.0557861328125
Current State,action,reward,Response time,Next State:  (5, 10.24826025489064) 1 26.0 1199.19771361 (10.276491935146446, 4)
loss 755.1008911132812
Current State,action,reward,Response time,Next State:  (4, 10.276491935146446) 4 24.0 1352.52446763 (10.236991269871366, 6)
loss 636.1692504882812
Current State,action,reward,Response time,Next State:  (6, 10.236991269871366) 3 23.0 1074.46432072 (10.236272697871373, 7)
loss 1505.631591796875
Current State,action,reward,Response time,Next State:  (7, 10.236272697871373) 4 21.0 1013.72020216 (10.369891240151098, 9)
loss 677.3522338867188
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 1671.983154296875
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 1592.9725341796875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 695.2918090820312
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 0 22.0 928.797305964 (10.425974763084863, 8)
loss 778.4793701171875
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 0 24.0 980.387437704 (10.546025383098053, 6)
loss 1931.8839111328125
Current State,action,reward,Response time,Next State:  (6, 10.546025383098053) 1 25.0 1094.90356069 (10.655373370049301, 5)
loss 1571.0543212890625
Current State,action,reward,Response time,Next State:  (5, 10.655373370049301) 3 24.0 1236.60417082 (10.624473674922116, 6)
loss 436.17144775390625
Current State,action,reward,Response time,Next State:  (6, 10.624473674922116) 4 22.0 1100.09206058 (10.771376986314287, 8)
loss 821.5048217773438
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 4 20.0 1000.57439983 (10.924797168745895, 10)
loss 618.29052734375
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 1640.07177734375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 1493.113525390625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 951.2095947265625
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 2 20.0 996.702699398 (11.819721938468785, 10)
loss 2803.81591796875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 1 21.0 1004.62682792 (12.19918626616789, 9)
loss 487.0557556152344
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 1735.76025390625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 1525.7535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 615.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 735.8928833007812
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 616.083251953125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 806.8319091796875
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 411.1871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 2527.345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1992.6075439453125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 1798.26025390625
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 934.4326171875
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 0 22.0 1314.91162813 (17.944480812078613, 8)
loss 1542.4267578125
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 3 21.0 1419.80498244 (18.385807405229915, 9)
loss 1035.2572021484375
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 1 22.0 1385.5238237 (18.671267839956315, 8)
loss 676.281982421875
Current State,action,reward,Response time,Next State:  (8, 18.671267839956315) 2 22.0 1462.2819013 (19.02839494033929, 8)
loss 648.9189453125
Current State,action,reward,Response time,Next State:  (8, 19.02839494033929) 2 22.0 1483.15412147 (19.286321916040979, 8)
loss 689.226318359375
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 2216.330322265625
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 1 22.0 1435.4954296 (19.213467265587269, 8)
loss 713.3515625
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
loss 1091.487548828125
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 1724.6697998046875
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 2 20.0 1405.95387237 (19.223969507401588, 10)
loss 406.8419494628906
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 4 19.0 1397.37841716 (19.25591252280865, 11)
loss 1571.717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 727.4566650390625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 1086.461669921875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 994.1620483398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 497.6871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 795.0521850585938
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 1721.181396484375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 416.61865234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 1885.723876953125
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 423.11932373046875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 1 22.0 1251.67208827 (15.550833128512703, 8)
loss 574.1983032226562
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 3 21.0 1279.90873428 (15.446694946204717, 9)
loss 630.6756591796875
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 2592.811279296875
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 638.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 2583.71533203125
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 414.7826843261719
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 1 21.0 1220.65695786 (15.954793861767499, 9)
loss 858.7381591796875
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 610.2186889648438
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 4 19.0 1226.60915635 (16.017694914042416, 11)
loss 615.1957397460938
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 697.5274047851562
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 4 19.0 1223.58357506 (16.11465619633363, 11)
loss 2102.026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 2706.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 2342.8408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1789.9903564453125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 875.1141967773438
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 4 19.0 1295.6017535 (16.836383524612351, 11)
loss 1545.889404296875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 1545.6278076171875
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 1483.1357421875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 2 20.0 1282.21925533 (17.215992726625572, 10)
loss 1522.71142578125
############ Running episode number: 41  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 2063.306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 630.4749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 2770.049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 535.1655883789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 1294.5867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1218.4969482421875
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 3020.82958984375
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 1 21.0 962.583328739 (10.995673623987257, 9)
loss 728.7684326171875
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 414.0894775390625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 2 20.0 957.495664348 (10.816918347608043, 10)
loss 896.4091796875
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 1900.808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 1662.6505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 802.7788696289062
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 421.14959716796875
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 878.3540649414062
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
loss 1380.2548828125
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 4 19.0 975.508144832 (10.553846649940214, 11)
loss 570.7099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 517.1514282226562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 1738.491455078125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 1315.893798828125
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 1748.0460205078125
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 0 22.0 931.538941947 (10.370942817486826, 8)
loss 2886.82568359375
Current State,action,reward,Response time,Next State:  (8, 10.370942817486826) 3 21.0 977.171106925 (10.42733414151318, 9)
loss 596.7174682617188
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 1 22.0 968.937023414 (10.388469398680568, 8)
loss 517.71875
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 3 21.0 978.19544437 (10.344006106602812, 9)
loss 699.7973022460938
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 0 23.0 964.575212011 (10.319026962956018, 7)
loss 545.31005859375
Current State,action,reward,Response time,Next State:  (7, 10.319026962956018) 2 23.0 1018.87717743 (10.30224719189987, 7)
loss 2667.708251953125
Current State,action,reward,Response time,Next State:  (7, 10.30224719189987) 3 22.0 1017.83151695 (10.278181486298042, 8)
loss 422.2846984863281
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 2 22.0 971.749689939 (10.268274366284802, 8)
loss 1732.0791015625
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 3 21.0 971.170670341 (10.335411397720526, 9)
loss 425.96527099609375
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 1358.1624755859375
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 0 22.0 924.314209939 (10.24826025489064, 8)
loss 625.78173828125
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 3 21.0 970.000949704 (10.276491935146446, 9)
loss 450.1714782714844
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 4 19.0 961.041178317 (10.236991269871366, 11)
loss 1654.8165283203125
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 2191.107421875
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 1 22.0 958.935899728 (10.369891240151098, 8)
loss 469.3463134765625
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 3 21.0 977.109647703 (10.316955310454549, 9)
loss 1542.7303466796875
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 0 23.0 963.159236328 (10.333617326102203, 7)
loss 1976.2042236328125
Current State,action,reward,Response time,Next State:  (7, 10.333617326102203) 3 22.0 1019.78640117 (10.390165524255663, 8)
loss 425.5901184082031
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
loss 925.9677124023438
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 1 22.0 968.865866662 (10.546025383098053, 8)
loss 587.6997680664062
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 3 21.0 987.40377158 (10.655373370049301, 9)
loss 408.5383605957031
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 1814.805419921875
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 0 22.0 941.225969064 (10.771376986314287, 8)
loss 1134.386962890625
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 3 21.0 1000.57439983 (10.924797168745895, 9)
loss 1775.3199462890625
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 4 19.0 994.97675791 (11.039747673816453, 11)
loss 2262.51904296875
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 2560.29833984375
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 0 23.0 1013.12870607 (11.670334358779868, 7)
loss 599.7797241210938
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 0 25.0 1103.08623692 (11.819721938468785, 5)
loss 1464.0101318359375
Current State,action,reward,Response time,Next State:  (5, 11.819721938468785) 3 24.0 1343.58710331 (12.19918626616789, 6)
loss 3417.247802734375
Current State,action,reward,Response time,Next State:  (6, 12.19918626616789) 0 26.0 1204.24214357 (12.501496275411796, 4)
loss 662.3804321289062
Current State,action,reward,Response time,Next State:  (4, 12.501496275411796) 1 -22.0311726538 1605.29472846 (13.168618569876575, 3)
loss 1078.18310546875
Current State,action,reward,Response time,Next State:  (3, 13.168618569876575) 3 26.0 2490.31172654 (13.649658108197247, 4)
loss 2284.410400390625
Current State,action,reward,Response time,Next State:  (4, 13.649658108197247) 3 25.0 1735.73095309 (14.283719188889453, 5)
loss 2851.39501953125
Current State,action,reward,Response time,Next State:  (5, 14.283719188889453) 3 24.0 1569.98464322 (14.677479537099185, 6)
loss 1858.748779296875
Current State,action,reward,Response time,Next State:  (6, 14.677479537099185) 3 23.0 1368.15424492 (15.353965082180355, 7)
loss 1203.6473388671875
Current State,action,reward,Response time,Next State:  (7, 15.353965082180355) 3 22.0 1332.63806181 (15.836943704090487, 8)
loss 547.6680908203125
Current State,action,reward,Response time,Next State:  (8, 15.836943704090487) 0 24.0 1296.63040821 (16.466876895473597, 6)
loss 2714.55908203125
Current State,action,reward,Response time,Next State:  (6, 16.466876895473597) 3 23.0 1486.50338648 (16.871606159345866, 7)
loss 677.9342651367188
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 1 24.0 1427.21249257 (17.534967586021782, 6)
loss 1103.6640625
Current State,action,reward,Response time,Next State:  (6, 17.534967586021782) 0 6.76217022756 1557.14594988 (17.669285735563751, 4)
loss 1537.301513671875
Current State,action,reward,Response time,Next State:  (4, 17.669285735563751) 1 -115.958642802 2192.37829772 (17.944480812078613, 3)
loss 2132.8388671875
Current State,action,reward,Response time,Next State:  (3, 17.944480812078613) 3 -1.37783049286 3429.58642802 (18.385807405229915, 4)
loss 797.0451049804688
Current State,action,reward,Response time,Next State:  (4, 18.385807405229915) 4 24.0 2273.77830493 (18.671267839956315, 6)
loss 1499.199951171875
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 3 23.0 1632.29983282 (19.02839494033929, 7)
loss 1542.930908203125
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 3 22.0 1561.61651886 (19.286321916040979, 8)
loss 728.54052734375
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 0 24.0 1498.22861069 (19.340464848017284, 6)
loss 422.3760681152344
Current State,action,reward,Response time,Next State:  (6, 19.340464848017284) 4 22.0 1676.55992467 (19.213467265587269, 8)
loss 871.2020263671875
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
loss 2084.088623046875
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 1736.412353515625
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 909.5154418945312
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 2224.44482421875
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 605.6201171875
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 1070.7347412109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 1486.2042236328125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 823.8070678710938
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 603.7156982421875
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 2 20.0 1291.59856437 (16.84211602880065, 10)
loss 606.9359130859375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 2 20.0 1271.03516211 (16.237094554670044, 10)
loss 1549.621826171875
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 1 21.0 1238.94234737 (15.950694610794756, 9)
loss 613.3843994140625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 0 23.0 1258.0576862 (15.828704162850809, 7)
loss 418.79296875
Current State,action,reward,Response time,Next State:  (7, 15.828704162850809) 1 24.0 1362.22224939 (15.550833128512703, 6)
loss 592.4605712890625
Current State,action,reward,Response time,Next State:  (6, 15.550833128512703) 1 25.0 1425.91707068 (15.446694946204717, 5)
loss 800.43359375
Current State,action,reward,Response time,Next State:  (5, 15.446694946204717) 1 26.0 1676.84143877 (15.750501603468638, 4)
loss 604.5551147460938
Current State,action,reward,Response time,Next State:  (4, 15.750501603468638) 4 24.0 1974.39599686 (15.817158911312735, 6)
loss 565.1466674804688
Current State,action,reward,Response time,Next State:  (6, 15.817158911312735) 3 23.0 1443.53161985 (15.829956988360925, 7)
loss 1715.02392578125
Current State,action,reward,Response time,Next State:  (7, 15.829956988360925) 1 24.0 1362.30032139 (15.892373986997768, 6)
loss 650.1615600585938
Current State,action,reward,Response time,Next State:  (6, 15.892373986997768) 3 23.0 1448.50627772 (15.954793861767499, 7)
loss 1742.4521484375
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 3 22.0 1370.07974724 (16.004586266677634, 8)
loss 649.9349365234375
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 1 23.0 1306.42824342 (16.017694914042416, 7)
loss 1495.58349609375
Current State,action,reward,Response time,Next State:  (7, 16.017694914042416) 0 25.0 1373.9995352 (15.947547279389703, 5)
loss 2343.647705078125
Current State,action,reward,Response time,Next State:  (5, 15.947547279389703) 3 24.0 1722.86086353 (16.11465619633363, 6)
loss 576.3152465820312
Current State,action,reward,Response time,Next State:  (6, 16.11465619633363) 3 23.0 1463.20782432 (16.147078378791146, 7)
loss 627.909912109375
Current State,action,reward,Response time,Next State:  (7, 16.147078378791146) 0 25.0 1382.06228977 (16.229253414601111, 5)
loss 637.3929443359375
Current State,action,reward,Response time,Next State:  (5, 16.229253414601111) 4 23.0 1748.74464891 (16.295120821876548, 7)
loss 2670.573974609375
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 4 21.0 1391.28781087 (16.667936385136993, 9)
loss 2661.4921875
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 2580.737548828125
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 1 21.0 1270.73108663 (16.845818065953559, 9)
loss 1295.1466064453125
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 0 23.0 1304.91298164 (17.052961248403161, 7)
loss 857.5263671875
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 4 21.0 1438.51394887 (17.215992726625572, 9)
loss 1067.4002685546875
############ Running episode number: 42  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 1372.34619140625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 0 24.0 1070.83307124 (11.786394321941378, 6)
loss 1278.1383056640625
Current State,action,reward,Response time,Next State:  (6, 11.786394321941378) 0 26.0 1176.94045342 (11.61852219546234, 4)
loss 757.8161010742188
Current State,action,reward,Response time,Next State:  (4, 11.61852219546234) 4 24.0 1504.98499672 (11.469111876584304, 6)
loss 2599.782470703125
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 3 23.0 1155.95567613 (11.336751742492702, 7)
loss 2094.302978515625
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 4 21.0 1082.29845875 (11.25610796929319, 9)
loss 1627.5450439453125
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 906.5510864257812
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 2 20.0 962.583328739 (10.995673623987257, 10)
loss 625.8147583007812
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 2 20.0 960.915933313 (10.931193889570471, 10)
loss 881.7793579101562
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 0 22.0 957.495664348 (10.816918347608043, 8)
loss 1395.30224609375
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 1 23.0 1003.23605536 (10.819208572963639, 7)
loss 880.0126953125
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 0 25.0 1050.04686027 (10.768325938188134, 5)
loss 760.5449829101562
Current State,action,reward,Response time,Next State:  (5, 10.768325938188134) 2 25.0 1246.98250365 (10.772009508959538, 5)
loss 528.4411010742188
Current State,action,reward,Response time,Next State:  (5, 10.772009508959538) 1 26.0 1247.32095832 (10.644925616761762, 4)
loss 962.3715209960938
Current State,action,reward,Response time,Next State:  (4, 10.644925616761762) 2 26.0 1394.38015129 (10.58735855349979, 4)
loss 1675.0767822265625
Current State,action,reward,Response time,Next State:  (4, 10.58735855349979) 2 26.0 1387.84028018 (10.552868829802469, 4)
loss 2809.42578125
Current State,action,reward,Response time,Next State:  (4, 10.552868829802469) 3 25.0 1383.92209616 (10.553846649940214, 5)
loss 696.106689453125
Current State,action,reward,Response time,Next State:  (5, 10.553846649940214) 0 27.0 1227.27567029 (10.489125480251131, 3)
loss 579.9691162109375
Current State,action,reward,Response time,Next State:  (3, 10.489125480251131) 4 25.0 1963.33252483 (10.448897752470936, 5)
loss 1067.4219970703125
Current State,action,reward,Response time,Next State:  (5, 10.448897752470936) 1 26.0 1217.6327325 (10.433149880183072, 4)
loss 914.6817016601562
Current State,action,reward,Response time,Next State:  (4, 10.433149880183072) 1 27.0 1370.32149792 (10.44185150623065, 3)
loss 1617.62841796875
Current State,action,reward,Response time,Next State:  (3, 10.44185150623065) 3 26.0 1954.03509421 (10.370942817486826, 4)
loss 2056.787841796875
Current State,action,reward,Response time,Next State:  (4, 10.370942817486826) 3 25.0 1363.25450251 (10.42733414151318, 5)
loss 1893.3662109375
Current State,action,reward,Response time,Next State:  (5, 10.42733414151318) 3 24.0 1215.65142003 (10.388469398680568, 6)
loss 702.3452758789062
Current State,action,reward,Response time,Next State:  (6, 10.388469398680568) 1 25.0 1084.48294874 (10.344006106602812, 5)
loss 627.5028686523438
Current State,action,reward,Response time,Next State:  (5, 10.344006106602812) 3 24.0 1207.99505511 (10.319026962956018, 6)
loss 1495.9459228515625
Current State,action,reward,Response time,Next State:  (6, 10.319026962956018) 3 23.0 1079.89008812 (10.30224719189987, 7)
loss 2695.724609375
Current State,action,reward,Response time,Next State:  (7, 10.30224719189987) 3 22.0 1017.83151695 (10.278181486298042, 8)
loss 1093.87841796875
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 0 24.0 971.749689939 (10.268274366284802, 6)
loss 1675.667236328125
Current State,action,reward,Response time,Next State:  (6, 10.268274366284802) 3 23.0 1076.53335675 (10.335411397720526, 7)
loss 719.9010009765625
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 0 25.0 1019.89820185 (10.305649118067803, 5)
loss 1659.9305419921875
Current State,action,reward,Response time,Next State:  (5, 10.305649118067803) 4 23.0 1204.47072981 (10.24826025489064, 7)
loss 520.3268432617188
Current State,action,reward,Response time,Next State:  (7, 10.24826025489064) 1 24.0 1014.46722752 (10.276491935146446, 6)
loss 2081.152099609375
Current State,action,reward,Response time,Next State:  (6, 10.276491935146446) 0 26.0 1077.07685941 (10.236991269871366, 4)
loss 1907.6287841796875
Current State,action,reward,Response time,Next State:  (4, 10.236991269871366) 3 25.0 1348.03701865 (10.236272697871373, 5)
loss 1367.7908935546875
Current State,action,reward,Response time,Next State:  (5, 10.236272697871373) 1 26.0 1198.09627024 (10.369891240151098, 4)
loss 2864.1767578125
Current State,action,reward,Response time,Next State:  (4, 10.369891240151098) 2 26.0 1363.1350387 (10.316955310454549, 4)
loss 456.8741149902344
Current State,action,reward,Response time,Next State:  (4, 10.316955310454549) 3 25.0 1357.1212847 (10.333617326102203, 5)
loss 656.7581787109375
Current State,action,reward,Response time,Next State:  (5, 10.333617326102203) 1 26.0 1207.04051089 (10.390165524255663, 4)
loss 1140.313232421875
Current State,action,reward,Response time,Next State:  (4, 10.390165524255663) 0 -62.3483664008 1365.43828638 (10.425974763084863, 2)
loss 653.3658447265625
Current State,action,reward,Response time,Next State:  (2, 10.425974763084863) 3 27.0 2903.48366401 (10.546025383098053, 3)
loss 1618.3802490234375
Current State,action,reward,Response time,Next State:  (3, 10.546025383098053) 4 25.0 1974.52309871 (10.655373370049301, 5)
loss 604.1593627929688
Current State,action,reward,Response time,Next State:  (5, 10.655373370049301) 3 24.0 1236.60417082 (10.624473674922116, 6)
loss 642.82861328125
Current State,action,reward,Response time,Next State:  (6, 10.624473674922116) 3 23.0 1100.09206058 (10.771376986314287, 7)
loss 629.739013671875
Current State,action,reward,Response time,Next State:  (7, 10.771376986314287) 1 24.0 1047.06615216 (10.924797168745895, 6)
loss 1542.9224853515625
Current State,action,reward,Response time,Next State:  (6, 10.924797168745895) 3 23.0 1119.95518797 (11.039747673816453, 7)
loss 3469.676513671875
Current State,action,reward,Response time,Next State:  (7, 11.039747673816453) 3 22.0 1063.7901361 (11.271571944085663, 8)
loss 2070.705322265625
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 0 24.0 1029.8081916 (11.670334358779868, 6)
loss 925.2747802734375
Current State,action,reward,Response time,Next State:  (6, 11.670334358779868) 3 23.0 1169.26435121 (11.819721938468785, 7)
loss 1624.9716796875
Current State,action,reward,Response time,Next State:  (7, 11.819721938468785) 4 21.0 1112.39558253 (12.19918626616789, 9)
loss 841.801025390625
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 1595.5714111328125
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 2 20.0 1040.79092857 (13.168618569876575, 10)
loss 1058.212890625
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 811.7005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 857.0328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 452.845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 3090.097412109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 799.74462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 422.9932556152344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 438.9039001464844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 617.3383178710938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 432.3052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 2028.9654541015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 2105.509765625
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 751.5645141601562
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 0 21.0 1354.73183582 (19.02839494033929, 9)
loss 507.9330139160156
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 4 19.0 1419.16011 (19.286321916040979, 11)
loss 562.1905517578125
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 2912.2998046875
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 638.5368041992188
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 434.9542236328125
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 479.2079772949219
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 2 20.0 1405.95387237 (19.223969507401588, 10)
loss 1035.5562744140625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 0 22.0 1397.37841716 (19.25591252280865, 8)
loss 498.7893981933594
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 1 23.0 1496.45133993 (19.08360399753829, 7)
loss 2136.857421875
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 4 21.0 1565.05696683 (18.668181536495972, 9)
loss 662.0474243164062
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 841.2357788085938
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 4 19.0 1352.39307459 (17.82724819986867, 11)
loss 831.1799926757812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 1213.7767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 423.5977478027344
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 782.7725830078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 758.2008056640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 1581.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 1065.63525390625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 599.433349609375
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 1 21.0 1197.01631782 (15.750501603468638, 9)
loss 747.0399169921875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 1484.97998046875
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 531.0714721679688
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 1800.98095703125
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 1662.5257568359375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 4 19.0 1223.96796344 (16.004586266677634, 11)
loss 1439.137451171875
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 1386.3951416015625
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 0 23.0 1261.56482143 (15.947547279389703, 7)
loss 1193.901611328125
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 1 24.0 1369.62816392 (16.11465619633363, 6)
loss 1781.9306640625
Current State,action,reward,Response time,Next State:  (6, 16.11465619633363) 0 24.0551140495 1463.20782432 (16.147078378791146, 4)
loss 1217.4571533203125
Current State,action,reward,Response time,Next State:  (4, 16.147078378791146) 3 25.0 2019.4488595 (16.229253414601111, 5)
loss 431.0208435058594
Current State,action,reward,Response time,Next State:  (5, 16.229253414601111) 4 23.0 1748.74464891 (16.295120821876548, 7)
loss 1648.4249267578125
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 3 22.0 1391.28781087 (16.667936385136993, 8)
loss 4024.865478515625
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 1 23.0 1345.1976051 (16.836383524612351, 7)
loss 531.3291625976562
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 1 24.0 1425.01753312 (16.845818065953559, 6)
loss 2712.8388671875
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 4 22.0 1511.56621672 (17.052961248403161, 8)
loss 1257.8289794921875
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 4 20.0 1367.70030431 (17.215992726625572, 10)
loss 1577.9656982421875
############ Running episode number: 43  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 631.7718505859375
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 2 20.0 1012.7846064 (11.786394321941378, 10)
loss 728.0443725585938
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 610.3782958984375
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 1651.429931640625
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 0 23.0 1023.46894667 (11.336751742492702, 7)
loss 669.49462890625
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 0 25.0 1082.29845875 (11.25610796929319, 5)
loss 891.6809692382812
Current State,action,reward,Response time,Next State:  (5, 11.25610796929319) 3 24.0 1291.80100003 (11.027107764209074, 6)
loss 429.89813232421875
Current State,action,reward,Response time,Next State:  (6, 11.027107764209074) 3 23.0 1126.72191929 (10.995673623987257, 7)
loss 903.5448608398438
Current State,action,reward,Response time,Next State:  (7, 10.995673623987257) 0 25.0 1061.04358539 (10.931193889570471, 5)
loss 1857.753662109375
Current State,action,reward,Response time,Next State:  (5, 10.931193889570471) 3 24.0 1261.94717276 (10.816918347608043, 6)
loss 780.2826538085938
Current State,action,reward,Response time,Next State:  (6, 10.816918347608043) 3 23.0 1112.82017919 (10.819208572963639, 7)
loss 607.265625
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 2 23.0 1050.04686027 (10.768325938188134, 7)
loss 2129.011962890625
Current State,action,reward,Response time,Next State:  (7, 10.768325938188134) 3 22.0 1046.87602081 (10.772009508959538, 8)
loss 630.8474731445312
Current State,action,reward,Response time,Next State:  (8, 10.772009508959538) 2 22.0 1000.61136749 (10.644925616761762, 8)
loss 515.8629150390625
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 4 20.0 993.183975462 (10.58735855349979, 10)
loss 1583.4873046875
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 933.3529663085938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 1305.45068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1586.7535400390625
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 1874.1279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 2546.35205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 2181.498291015625
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 1548.312744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 423.5954284667969
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 492.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 1525.9296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 867.961669921875
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 695.9541015625
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 1 22.0 962.389338906 (10.278181486298042, 8)
loss 427.7015686035156
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 846.3202514648438
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 0 23.0 960.611029141 (10.335411397720526, 7)
loss 1017.1122436523438
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 3 22.0 1019.89820185 (10.305649118067803, 8)
loss 667.0783081054688
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 4 20.0 973.355030047 (10.24826025489064, 10)
loss 1574.7095947265625
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 515.0291137695312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 641.690673828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 1906.39208984375
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 813.6160888671875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 4 19.0 927.721874973 (10.316955310454549, 11)
loss 814.913330078125
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 420.7582702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 1062.05712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 2277.115966796875
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 627.722900390625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 640.748046875
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 529.4992065429688
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 613.7145385742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 419.4894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1508.4329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 2038.7520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 603.240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 1424.802490234375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 540.3838500976562
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 2451.00390625
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 2236.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 432.1253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 600.0704345703125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 1800.5625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 441.649169921875
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 432.96417236328125
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 1782.9608154296875
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 462.6242980957031
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 1969.8985595703125
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 1610.6119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 1156.835693359375
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 1572.0399169921875
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 791.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 494.67681884765625
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 1224.903076171875
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 2 20.0 1400.68584406 (19.340464848017284, 10)
loss 1056.8369140625
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 1834.6854248046875
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 1 20.0 1383.38281107 (19.140765783401285, 10)
loss 707.7079467773438
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 0 22.0 1392.96495117 (19.385636054792762, 8)
loss 816.3812255859375
Current State,action,reward,Response time,Next State:  (8, 19.385636054792762) 3 21.0 1504.03300517 (19.223969507401588, 9)
loss 877.2918090820312
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 2 21.0 1429.39747342 (19.25591252280865, 9)
loss 1587.7235107421875
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 535.5799560546875
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 0 22.0 1389.93285614 (18.668181536495972, 8)
loss 425.15020751953125
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
loss 3771.189697265625
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 4 19.0 1385.00495784 (17.82724819986867, 11)
loss 1908.9422607421875
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 609.1019897460938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 839.8087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 437.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 844.8465576171875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 628.7877197265625
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 2 20.0 1217.27964986 (15.550833128512703, 10)
loss 2358.076904296875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 0 22.0 1202.54023315 (15.446694946204717, 8)
loss 887.260498046875
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 2 22.0 1273.82239956 (15.750501603468638, 8)
loss 1332.5684814453125
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 3 21.0 1291.57831736 (15.817158911312735, 9)
loss 424.1610412597656
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 2 21.0 1251.06775133 (15.829956988360925, 9)
loss 549.8984375
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 1713.6334228515625
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 2399.352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1721.7357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 703.450927734375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 834.8782348632812
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 1420.7750244140625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 1849.96630859375
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 2 20.0 1234.16752106 (16.229253414601111, 10)
loss 616.3038940429688
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 1968.1129150390625
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 1613.0938720703125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 2073.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 2430.88134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 2027.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 1372.7052001953125
############ Running episode number: 44  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 2173.96142578125
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 432.3873596191406
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 0 22.0 1002.85899476 (11.61852219546234, 8)
loss 435.04449462890625
Current State,action,reward,Response time,Next State:  (8, 11.61852219546234) 3 21.0 1050.08562792 (11.469111876584304, 9)
loss 1391.01416015625
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 1462.023681640625
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 1 21.0 979.00811241 (11.25610796929319, 9)
loss 971.5552978515625
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 1545.268798828125
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 2919.404541015625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 2111.09423828125
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 442.44683837890625
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 825.8064575195312
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 1335.3406982421875
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 1049.6090087890625
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 0 22.0 949.051873418 (10.644925616761762, 8)
loss 2730.568115234375
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 0 24.0 993.183975462 (10.58735855349979, 6)
loss 1215.6680908203125
Current State,action,reward,Response time,Next State:  (6, 10.58735855349979) 4 22.0 1097.63729963 (10.552868829802469, 8)
loss 602.1082763671875
Current State,action,reward,Response time,Next State:  (8, 10.552868829802469) 4 20.0 987.80373542 (10.553846649940214, 10)
loss 1026.324951171875
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 2 20.0 937.479622653 (10.489125480251131, 10)
loss 1225.9842529296875
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 620.980712890625
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 1604.58447265625
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 1553.789306640625
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 966.728271484375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 4 19.0 927.777654938 (10.42733414151318, 11)
loss 916.0977172851562
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 794.7066650390625
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 2 21.0 966.902645924 (10.344006106602812, 9)
loss 1722.9130859375
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 635.700439453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 4 19.0 925.023825574 (10.30224719189987, 11)
loss 681.0963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 582.3886108398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1600.187744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 880.0322875976562
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 456.33892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 614.8790893554688
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 613.6709594726562
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 2 20.0 922.767593645 (10.236991269871366, 10)
loss 1395.6929931640625
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 422.3908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 1306.96337890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 1074.298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 696.8766479492188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 784.5260620117188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1962.1800537109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 640.5922241210938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1451.6778564453125
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 633.9887084960938
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 4 19.0 941.225969064 (10.771376986314287, 11)
loss 2878.073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 606.9406127929688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 843.8738403320312
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 406.36669921875
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 1942.8319091796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 3639.941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1273.71044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1657.02197265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 1630.5240478515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 762.5655517578125
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 1849.971923828125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 4 19.0 1135.32732476 (14.677479537099185, 11)
loss 2759.362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 542.6073608398438
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 2 19.0 1179.43847566 (15.836943704090487, 11)
loss 541.4008178710938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1623.3865966796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 1474.1094970703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 1753.3328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1583.3570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 636.1961059570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 428.1554870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 411.18511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 624.2384643554688
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 446.8600158691406
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 4 19.0 1432.66131431 (19.340464848017284, 11)
loss 416.0489501953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 1787.0880126953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 426.6398010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 2361.8125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 708.8095703125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 460.83770751953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 2211.44970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 608.2606201171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 432.0448913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 418.3486328125
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 622.0284423828125
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 414.611572265625
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 4 19.0 1304.71919827 (16.237094554670044, 11)
loss 419.11761474609375
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 782.8544311523438
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 617.4974975585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 1513.338134765625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 418.88909912109375
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 461.91314697265625
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 2 20.0 1213.1314661 (15.817158911312735, 10)
loss 434.3459167480469
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 418.21575927734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 437.989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 428.4676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 512.8082885742188
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 420.26629638671875
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 408.4875793457031
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 2 20.0 1223.58357506 (16.11465619633363, 10)
loss 705.990966796875
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 0 22.0 1232.44771583 (16.147078378791146, 8)
loss 559.4774780273438
Current State,action,reward,Response time,Next State:  (8, 16.147078378791146) 1 23.0 1314.7561657 (16.229253414601111, 7)
loss 431.6165771484375
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 3 22.0 1387.18316937 (16.295120821876548, 8)
loss 423.7150573730469
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 4 20.0 1323.40847593 (16.667936385136993, 10)
loss 424.9766540527344
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 426.941162109375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 1385.1695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 554.5377197265625
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 594.355224609375
############ Running episode number: 45  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 407.02593994140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 409.3383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 529.6690063476562
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 766.1687622070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 405.9361267089844
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 424.60858154296875
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 1358.408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 434.43634033203125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 430.1722106933594
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 0 22.0 957.495664348 (10.816918347608043, 8)
loss 1377.26171875
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 3 21.0 1003.23605536 (10.819208572963639, 9)
loss 782.0311279296875
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 656.1549682617188
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 4 19.0 948.856481751 (10.772009508959538, 11)
loss 450.9430236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 613.27587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 1385.363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 559.843994140625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 596.4609375
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 4 19.0 975.559328891 (10.489125480251131, 11)
loss 417.8855895996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 1396.953369140625
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 624.8320922851562
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 943.3914184570312
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 4 19.0 931.538941947 (10.370942817486826, 11)
loss 1266.2261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 1377.7879638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 551.6023559570312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 426.3240051269531
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 418.4803161621094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 4 19.0 925.023825574 (10.30224719189987, 11)
loss 543.741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 424.5955505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1273.778564453125
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 418.51776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 413.4195556640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 622.5116577148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 447.8623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 552.315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 1269.4810791015625
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 404.8768005371094
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 534.1342163085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 427.96636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 429.158203125
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 637.9498901367188
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 509.8648986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 431.05889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 416.55596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 541.8541870117188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 580.2689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 614.7315063476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 531.84521484375
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 713.9391479492188
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 1 21.0 996.702699398 (11.819721938468785, 9)
loss 432.62408447265625
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 432.7212219238281
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 1381.39404296875
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 404.0318298339844
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 556.7247924804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 426.8479919433594
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 421.4690246582031
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 612.5263671875
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 1 21.0 1192.09754638 (15.836943704090487, 9)
loss 514.5374755859375
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 0 23.0 1252.10338759 (16.466876895473597, 7)
loss 420.34283447265625
Current State,action,reward,Response time,Next State:  (7, 16.466876895473597) 3 22.0 1401.99108791 (16.871606159345866, 8)
loss 524.554931640625
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 0 24.0 1357.1010433 (17.534967586021782, 6)
loss 424.97845458984375
Current State,action,reward,Response time,Next State:  (6, 17.534967586021782) 3 23.0 1557.14594988 (17.669285735563751, 7)
loss 453.8053894042969
Current State,action,reward,Response time,Next State:  (7, 17.669285735563751) 2 23.0 1476.92127615 (17.944480812078613, 7)
loss 606.128173828125
Current State,action,reward,Response time,Next State:  (7, 17.944480812078613) 3 22.0 1494.0705337 (18.385807405229915, 8)
loss 531.6129760742188
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 3 21.0 1445.59822471 (18.671267839956315, 9)
loss 427.9967346191406
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 414.36199951171875
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 1 21.0 1387.00434183 (19.286321916040979, 9)
loss 1367.0206298828125
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 2 21.0 1432.66131431 (19.340464848017284, 9)
loss 429.1001281738281
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 729.6063232421875
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 814.8981323242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 1417.3453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 421.4450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 425.1710510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 429.9239196777344
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 644.6958618164062
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 424.90252685546875
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 415.2070617675781
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 555.4797973632812
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 1 21.0 1291.59856437 (16.84211602880065, 9)
loss 1675.62744140625
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 613.4750366210938
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 2 20.0 1238.94234737 (15.950694610794756, 10)
loss 1378.365966796875
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 578.354248046875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 541.7180786132812
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 427.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 412.6136169433594
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 427.4215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 1399.9691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 416.50177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 416.477783203125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 397.2940673828125
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 428.0378723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 609.5703735351562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 426.489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 460.53350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 2412.528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1613.567626953125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 1621.1337890625
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 531.2015991210938
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 626.0790405273438
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 4 19.0 1258.27117243 (17.052961248403161, 11)
loss 462.8563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 410.10546875
############ Running episode number: 46  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 423.15936279296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 609.6194458007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 421.29156494140625
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 2229.673828125
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 610.4968872070312
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 4 19.0 1016.54054685 (11.25610796929319, 11)
loss 1404.4189453125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 542.2691650390625
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 1382.08740234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 4 19.0 960.915933313 (10.931193889570471, 11)
loss 588.0267944335938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 1437.8980712890625
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 800.6889038085938
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 2 20.0 951.555504911 (10.768325938188134, 10)
loss 551.2426147460938
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 417.513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 1380.063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 535.0037231445312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1483.1856689453125
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 606.6080932617188
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 1 21.0 937.479622653 (10.489125480251131, 9)
loss 413.2664794921875
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 2 21.0 972.171495057 (10.448897752470936, 9)
loss 408.1126403808594
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 600.6100463867188
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 423.3218688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 432.1153259277344
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 422.0296630859375
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 873.9976196289062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 409.94537353515625
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 2224.483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 423.8353271484375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 424.8251647949219
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 1271.162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 1276.8026123046875
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 527.12548828125
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 1 22.0 962.567412952 (10.24826025489064, 8)
loss 613.2853393554688
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 3 21.0 970.000949704 (10.276491935146446, 9)
loss 405.86431884765625
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 0 23.0 961.041178317 (10.236991269871366, 7)
loss 640.913818359375
Current State,action,reward,Response time,Next State:  (7, 10.236991269871366) 3 22.0 1013.76498121 (10.236272697871373, 8)
loss 423.5451965332031
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 3 21.0 969.300339391 (10.369891240151098, 9)
loss 552.5301513671875
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 464.26568603515625
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 614.32470703125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 628.1397094726562
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 418.7598571777344
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 0 22.0 930.696774523 (10.546025383098053, 8)
loss 1268.0091552734375
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 4 20.0 987.40377158 (10.655373370049301, 10)
loss 1434.76953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 4 19.0 942.865015335 (10.624473674922116, 11)
loss 1469.719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 700.0712890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 513.7651977539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1566.8753662109375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 450.7246398925781
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 1659.290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 1268.7032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 412.9107666015625
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 0 21.0 1012.73322757 (12.501496275411796, 9)
loss 426.2727355957031
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
loss 420.795166015625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 4 19.0 1076.17782493 (13.649658108197247, 11)
loss 552.5216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1365.3466796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 1479.3681640625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 620.6677856445312
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 2 20.0 1192.09754638 (15.836943704090487, 10)
loss 1567.0897216796875
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 423.5416259765625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 1374.634033203125
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 1 22.0 1306.26286107 (17.534967586021782, 8)
loss 599.4218139648438
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 3 21.0 1395.8710659 (17.669285735563751, 9)
loss 432.4626770019531
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 2 21.0 1348.01745033 (17.944480812078613, 9)
loss 624.1199951171875
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 0 23.0 1362.4225545 (18.385807405229915, 7)
loss 561.9763793945312
Current State,action,reward,Response time,Next State:  (7, 18.385807405229915) 0 25.0 1521.57256429 (18.671267839956315, 5)
loss 525.9144897460938
Current State,action,reward,Response time,Next State:  (5, 18.671267839956315) 0 -137.276115284 1973.12235802 (19.02839494033929, 3)
loss 428.1051940917969
Current State,action,reward,Response time,Next State:  (3, 19.02839494033929) 3 -11.6080706615 3642.76115284 (19.286321916040979, 4)
loss 638.49267578125
Current State,action,reward,Response time,Next State:  (4, 19.286321916040979) 2 -12.2231581375 2376.08070662 (19.340464848017284, 4)
loss 424.0931701660156
Current State,action,reward,Response time,Next State:  (4, 19.340464848017284) 3 22.7059154527 2382.23158138 (19.213467265587269, 5)
loss 593.31005859375
Current State,action,reward,Response time,Next State:  (5, 19.213467265587269) 0 -139.486126476 2022.94084547 (19.140765783401285, 3)
loss 1374.6768798828125
Current State,action,reward,Response time,Next State:  (3, 19.140765783401285) 3 -12.7363228767 3664.86126476 (19.385636054792762, 4)
loss 1366.9918212890625
Current State,action,reward,Response time,Next State:  (4, 19.385636054792762) 3 22.6094185224 2387.36322877 (19.223969507401588, 5)
loss 1648.9461669921875
Current State,action,reward,Response time,Next State:  (5, 19.223969507401588) 3 24.0 2023.90581478 (19.25591252280865, 6)
loss 413.403076171875
Current State,action,reward,Response time,Next State:  (6, 19.25591252280865) 3 23.0 1670.96770947 (19.08360399753829, 7)
loss 420.7586669921875
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 0 25.0 1565.05696683 (18.668181536495972, 5)
loss 455.7314453125
Current State,action,reward,Response time,Next State:  (5, 18.668181536495972) 2 25.0 1972.8387816 (18.375894992990247, 5)
loss 1264.4456787109375
Current State,action,reward,Response time,Next State:  (5, 18.375894992990247) 3 24.0 1945.98284482 (17.82724819986867, 6)
loss 418.5956726074219
Current State,action,reward,Response time,Next State:  (6, 17.82724819986867) 1 25.0 1576.47712838 (17.229782241685768, 5)
loss 606.8087158203125
Current State,action,reward,Response time,Next State:  (5, 17.229782241685768) 3 24.0 1840.67545971 (16.84211602880065, 6)
loss 641.5971069335938
Current State,action,reward,Response time,Next State:  (6, 16.84211602880065) 3 23.0 1511.32136729 (16.237094554670044, 7)
loss 428.6896667480469
Current State,action,reward,Response time,Next State:  (7, 16.237094554670044) 1 24.0 1387.67180358 (15.950694610794756, 6)
loss 456.9220886230469
Current State,action,reward,Response time,Next State:  (6, 15.950694610794756) 4 22.0 1452.36355164 (15.828704162850809, 8)
loss 549.5294799804688
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 3 21.0 1296.14884991 (15.550833128512703, 9)
loss 417.71142578125
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 4 19.0 1237.12691092 (15.446694946204717, 11)
loss 608.3958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 711.7413330078125
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 541.8766479492188
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 758.429931640625
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 517.4761352539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 616.5897827148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 412.4893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1366.2281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 619.4761352539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1453.1031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 1274.4307861328125
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 617.4415893554688
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 599.8679809570312
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 4 19.0 1242.02029803 (16.667936385136993, 11)
loss 766.48046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 595.6502075195312
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 416.2355651855469
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 606.840087890625
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 421.6844177246094
############ Running episode number: 47  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 510.12066650390625
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 429.42578125
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 579.8580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 605.005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 552.744873046875
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 426.06121826171875
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 608.8397216796875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 4 19.0 962.583328739 (10.995673623987257, 11)
loss 775.3683471679688
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 611.8143310546875
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 0 23.0 995.311594677 (10.816918347608043, 7)
loss 1623.60986328125
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 0 25.0 1049.90414092 (10.819208572963639, 5)
loss 456.62164306640625
Current State,action,reward,Response time,Next State:  (5, 10.819208572963639) 3 24.0 1251.65771315 (10.768325938188134, 6)
loss 1471.182373046875
Current State,action,reward,Response time,Next State:  (6, 10.768325938188134) 3 23.0 1109.60632067 (10.772009508959538, 7)
loss 417.6426696777344
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 3 22.0 1047.1055689 (10.644925616761762, 8)
loss 414.7514953613281
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 1 23.0 993.183975462 (10.58735855349979, 7)
loss 417.4756774902344
Current State,action,reward,Response time,Next State:  (7, 10.58735855349979) 3 22.0 1035.59872498 (10.552868829802469, 8)
loss 511.85821533203125
Current State,action,reward,Response time,Next State:  (8, 10.552868829802469) 2 22.0 987.80373542 (10.553846649940214, 8)
loss 585.0379028320312
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 1 23.0 987.860883917 (10.489125480251131, 7)
loss 554.424072265625
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 3 22.0 1029.47716098 (10.448897752470936, 8)
loss 1552.669921875
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 3 21.0 981.727167119 (10.433149880183072, 9)
loss 412.2359313964844
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 428.4924621582031
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 404.2818603515625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 527.4063110351562
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 415.1587219238281
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 2 20.0 928.707336523 (10.344006106602812, 10)
loss 606.585693359375
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 421.0908203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 509.03076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 434.6380615234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 414.238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 430.5936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 542.4453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 426.67138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 578.8198852539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 429.8056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 1751.35595703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 409.2165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 2221.061279296875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 424.3960876464844
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 4 19.0 964.03141062 (10.390165524255663, 11)
loss 420.1289367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 607.4049682617188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 419.1750183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 411.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 576.3075561523438
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 1385.6732177734375
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 419.2333984375
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 427.1004333496094
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 2 21.0 1000.99384957 (11.271571944085663, 9)
loss 1368.8797607421875
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 608.4307250976562
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 430.2554931640625
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 518.4107055664062
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 410.87786865234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 413.4464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 574.7984619140625
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 584.8424682617188
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 551.6474609375
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 452.4949951171875
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 2 19.0 1179.43847566 (15.836943704090487, 11)
loss 895.5451049804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1260.5772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1263.7528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 419.9073181152344
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 442.2996520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 602.5570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 415.1378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 433.7390441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 414.1628723144531
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 406.7234191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 949.6025390625
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 1444.4083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 601.489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 419.37969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 1442.6163330078125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 0 21.0 1383.93777195 (19.25591252280865, 9)
loss 529.9261474609375
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 417.06640625
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 413.2663269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 411.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 431.21337890625
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 614.3974609375
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 439.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 550.2035522460938
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 410.3786315917969
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 2 21.0 1258.0576862 (15.828704162850809, 9)
loss 417.6405029296875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 424.55377197265625
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 2 20.0 1202.54023315 (15.446694946204717, 10)
loss 415.150146484375
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 423.591796875
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 428.177001953125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 455.7660827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 411.8075866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 419.6589050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 561.7017211914062
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 446.40130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 451.4104919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 611.62451171875
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 464.0350646972656
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 415.0632019042969
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 4 19.0 1238.52642122 (16.295120821876548, 11)
loss 415.48992919921875
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 409.8255310058594
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 0 22.0 1261.79596106 (16.836383524612351, 8)
loss 420.87835693359375
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 3 21.0 1355.04246364 (16.845818065953559, 9)
loss 403.85968017578125
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 4 19.0 1304.91298164 (17.052961248403161, 11)
loss 608.7224731445312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 459.5818786621094
############ Running episode number: 48  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 601.4411010742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 414.4996643066406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 409.05523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 421.796875
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 590.6053466796875
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 4 19.0 1016.54054685 (11.25610796929319, 11)
loss 418.0525817871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 615.1925659179688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 458.8141784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 453.556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 426.0525817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 407.3077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 420.0198669433594
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 422.9248352050781
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 429.95269775390625
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 403.7227478027344
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 417.56402587890625
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 414.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 425.3098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 414.4115905761719
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 416.6392517089844
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 1 21.0 931.077372094 (10.44185150623065, 9)
loss 414.1781921386719
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 412.5024719238281
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 2 20.0 927.777654938 (10.42733414151318, 10)
loss 423.56610107421875
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 1 21.0 930.768881517 (10.388469398680568, 9)
loss 414.08837890625
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 406.726806640625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 4 19.0 926.348821567 (10.319026962956018, 11)
loss 405.4908447265625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 414.67608642578125
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 411.65106201171875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 0 22.0 922.857214352 (10.268274366284802, 8)
loss 410.73272705078125
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 0 24.0 971.170670341 (10.335411397720526, 6)
loss 431.0458679199219
Current State,action,reward,Response time,Next State:  (6, 10.335411397720526) 2 24.0 1080.97373999 (10.305649118067803, 6)
loss 434.0579833984375
Current State,action,reward,Response time,Next State:  (6, 10.305649118067803) 4 22.0 1079.00528942 (10.24826025489064, 8)
loss 405.7825927734375
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 1 23.0 970.000949704 (10.276491935146446, 7)
loss 408.9662780761719
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 1 24.0 1016.22653355 (10.236991269871366, 6)
loss 458.8747863769531
Current State,action,reward,Response time,Next State:  (6, 10.236991269871366) 3 23.0 1074.46432072 (10.236272697871373, 7)
loss 408.8544006347656
Current State,action,reward,Response time,Next State:  (7, 10.236272697871373) 3 22.0 1013.72020216 (10.369891240151098, 8)
loss 454.5673828125
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 3 21.0 977.109647703 (10.316955310454549, 9)
loss 407.0250549316406
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 1 22.0 963.159236328 (10.333617326102203, 8)
loss 593.1444091796875
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 0 24.0 974.989626232 (10.390165524255663, 6)
loss 560.6353149414062
Current State,action,reward,Response time,Next State:  (6, 10.390165524255663) 3 23.0 1084.59512897 (10.425974763084863, 7)
loss 408.88519287109375
Current State,action,reward,Response time,Next State:  (7, 10.425974763084863) 2 23.0 1025.54181472 (10.546025383098053, 7)
loss 413.61590576171875
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 2 23.0 1033.02297692 (10.655373370049301, 7)
loss 414.92950439453125
Current State,action,reward,Response time,Next State:  (7, 10.655373370049301) 1 24.0 1039.83718601 (10.624473674922116, 6)
loss 605.4585571289062
Current State,action,reward,Response time,Next State:  (6, 10.624473674922116) 3 23.0 1100.09206058 (10.771376986314287, 7)
loss 412.2804870605469
Current State,action,reward,Response time,Next State:  (7, 10.771376986314287) 4 21.0 1047.06615216 (10.924797168745895, 9)
loss 400.9492492675781
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 0 23.0 994.97675791 (11.039747673816453, 7)
loss 425.0900573730469
Current State,action,reward,Response time,Next State:  (7, 11.039747673816453) 2 23.0 1063.7901361 (11.271571944085663, 7)
loss 424.3017883300781
Current State,action,reward,Response time,Next State:  (7, 11.271571944085663) 2 23.0 1078.23666679 (11.670334358779868, 7)
loss 603.552001953125
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 3 22.0 1103.08623692 (11.819721938468785, 8)
loss 466.9069519042969
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 2 22.0 1061.84470565 (12.19918626616789, 8)
loss 414.9711608886719
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 2 22.0 1084.02242049 (12.501496275411796, 8)
loss 407.2023010253906
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 3 21.0 1101.69086701 (13.168618569876575, 9)
loss 421.8220520019531
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 2 21.0 1112.429735 (13.649658108197247, 9)
loss 411.9335021972656
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 4 19.0 1137.6097809 (14.283719188889453, 11)
loss 586.398193359375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 608.1195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 546.6906127929688
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 410.6437072753906
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 410.6376953125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 413.4617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 409.8889465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 425.658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 432.629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 551.0905151367188
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 413.8419494628906
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 427.1347961425781
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 420.3189392089844
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 456.6932067871094
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 2 20.0 1403.55780672 (19.213467265587269, 10)
loss 603.6148681640625
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 418.7995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 461.95819091796875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 595.7783203125
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 418.9583435058594
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 612.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 419.4586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 410.0869445800781
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 568.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 425.7281494140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 610.4100341796875
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 417.09832763671875
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 412.2889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 601.1132202148438
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 616.3035278320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 425.1096496582031
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 416.971923828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 406.02764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 451.6455383300781
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 611.238037109375
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 2 20.0 1220.65695786 (15.954793861767499, 10)
loss 415.6210021972656
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 1 21.0 1223.96796344 (16.004586266677634, 9)
loss 405.6148986816406
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 422.9673767089844
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 4 19.0 1227.30449265 (15.947547279389703, 11)
loss 542.7395629882812
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 416.9072570800781
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 4 19.0 1232.44771583 (16.147078378791146, 11)
loss 455.1497497558594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 407.49053955078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 539.1990356445312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 438.8006286621094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 429.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 543.2623901367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 417.22528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 415.63812255859375
############ Running episode number: 49  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 411.6507263183594
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 422.9349060058594
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 0 23.0 1040.0771169 (11.61852219546234, 7)
loss 531.9929809570312
Current State,action,reward,Response time,Next State:  (7, 11.61852219546234) 0 25.0 1099.85747227 (11.469111876584304, 5)
loss 418.79608154296875
Current State,action,reward,Response time,Next State:  (5, 11.469111876584304) 3 24.0 1311.37227212 (11.336751742492702, 6)
loss 416.83953857421875
Current State,action,reward,Response time,Next State:  (6, 11.336751742492702) 3 23.0 1147.20149519 (11.25610796929319, 7)
loss 732.4479370117188
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 3 22.0 1077.27300243 (11.027107764209074, 8)
loss 408.1920471191406
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 2 22.0 1015.52053272 (10.995673623987257, 8)
loss 402.19561767578125
Current State,action,reward,Response time,Next State:  (8, 10.995673623987257) 2 22.0 1013.68337084 (10.931193889570471, 8)
loss 544.1305541992188
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 2 22.0 1009.91486598 (10.816918347608043, 8)
loss 553.0755004882812
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 1 23.0 1003.23605536 (10.819208572963639, 7)
loss 543.112548828125
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 3 22.0 1050.04686027 (10.768325938188134, 8)
loss 412.2047119140625
Current State,action,reward,Response time,Next State:  (8, 10.768325938188134) 2 22.0 1000.39608195 (10.772009508959538, 8)
loss 415.88232421875
Current State,action,reward,Response time,Next State:  (8, 10.772009508959538) 3 21.0 1000.61136749 (10.644925616761762, 9)
loss 404.00823974609375
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 2 21.0 980.32686333 (10.58735855349979, 9)
loss 404.5552673339844
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 4 19.0 977.313511661 (10.552868829802469, 11)
loss 587.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 777.0877685546875
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 402.0357971191406
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 419.1392517089844
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 425.973388671875
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 426.0027160644531
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 400.3706970214844
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 585.9840698242188
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 416.56463623046875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 417.71771240234375
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 408.9986267089844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 411.2225036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 426.34185791015625
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 547.968994140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 411.77618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 418.29058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 541.2298583984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 551.6074829101562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 603.99658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 417.53143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 427.0343017578125
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 416.77520751953125
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 2 21.0 963.159236328 (10.333617326102203, 9)
loss 409.2169494628906
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 2 21.0 964.03141062 (10.390165524255663, 9)
loss 547.983154296875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 4 19.0 966.991429728 (10.425974763084863, 11)
loss 409.9344787597656
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 418.6051025390625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 416.5033264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 649.488037109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 418.51934814453125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 415.5863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 413.0160217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 456.73211669921875
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 642.3933715820312
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 421.4344177246094
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 413.26177978515625
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 4 19.0 1061.68473805 (12.501496275411796, 11)
loss 604.91015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 418.1791687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 454.8204650878906
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 453.9863586425781
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 4 19.0 1170.79974938 (14.677479537099185, 11)
loss 421.9708251953125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 416.6817932128906
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 0 23.0 1226.82184023 (15.836943704090487, 7)
loss 426.88934326171875
Current State,action,reward,Response time,Next State:  (7, 15.836943704090487) 3 22.0 1362.73571067 (16.466876895473597, 8)
loss 446.3008117675781
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 3 21.0 1333.44672445 (16.871606159345866, 9)
loss 413.2052917480469
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 406.3385925292969
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 591.3819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 412.7881774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 409.7132873535156
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 406.3018798828125
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 420.26702880859375
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 1 21.0 1387.00434183 (19.286321916040979, 9)
loss 415.93768310546875
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 0 23.0 1432.66131431 (19.340464848017284, 7)
loss 417.3559265136719
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 4 21.0 1581.06369535 (19.213467265587269, 9)
loss 417.1002197265625
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 1 22.0 1428.84773289 (19.140765783401285, 8)
loss 412.27862548828125
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 425.5955505371094
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 0 23.0 1437.85991935 (19.223969507401588, 7)
loss 601.2249755859375
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 3 22.0 1573.80408654 (19.25591252280865, 8)
loss 421.5533447265625
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 1 23.0 1496.45133993 (19.08360399753829, 7)
loss 426.6429138183594
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 4 21.0 1565.05696683 (18.668181536495972, 9)
loss 416.27197265625
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 422.00634765625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 4 19.0 1352.39307459 (17.82724819986867, 11)
loss 421.0092468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 735.36474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 412.5306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 416.2908630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 424.0127258300781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 399.5279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 403.9450988769531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
loss 592.8313598632812
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 409.8517150878906
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 427.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 605.289794921875
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 414.8651428222656
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 447.9554138183594
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 2 20.0 1223.96796344 (16.004586266677634, 10)
loss 562.3958740234375
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 0 22.0 1226.60915635 (16.017694914042416, 8)
loss 418.3118896484375
Current State,action,reward,Response time,Next State:  (8, 16.017694914042416) 3 21.0 1307.19437562 (15.947547279389703, 9)
loss 420.20269775390625
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 2 21.0 1257.89293893 (16.11465619633363, 9)
loss 408.0174560546875
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 423.7178039550781
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 4 19.0 1234.16752106 (16.229253414601111, 11)
loss 429.12542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 415.19061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 729.9873657226562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 422.8730773925781
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 425.749755859375
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 1 22.0 1304.91298164 (17.052961248403161, 8)
loss 422.7039489746094
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 1 23.0 1367.70030431 (17.215992726625572, 7)
loss 423.707763671875
############ Running episode number: 50  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 416.07684326171875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 428.17474365234375
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 604.5614624023438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 421.5291748046875
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 417.2900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 421.50244140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 414.0739440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 398.7369079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 547.7593383789062
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 426.9721984863281
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 2 21.0 989.329834005 (10.819208572963639, 9)
loss 608.5868530273438
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 409.7531433105469
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 4 19.0 948.856481751 (10.772009508959538, 11)
loss 419.7083435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 621.4795532226562
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 415.6130676269531
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
loss 443.6061706542969
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 451.5210266113281
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 427.27777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 417.8668212890625
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 419.6628723144531
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 533.222412109375
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 611.8660888671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 745.3718872070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 420.84454345703125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 585.480712890625
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 450.19952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 419.70172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 607.3033447265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 592.8291015625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 443.48492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 416.6914367675781
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 445.5116271972656
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 410.0061950683594
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 405.685546875
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 591.87841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 411.0533142089844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 417.2004089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 605.3768310546875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 604.5467529296875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 602.3016357421875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 4 19.0 930.696774523 (10.546025383098053, 11)
loss 405.2763366699219
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 412.9815673828125
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 0 22.0 942.865015335 (10.624473674922116, 8)
loss 926.5594482421875
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 2 22.0 991.988665914 (10.771376986314287, 8)
loss 412.3397521972656
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 3 21.0 1000.57439983 (10.924797168745895, 9)
loss 453.0654296875
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 424.06976318359375
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 411.97308349609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 590.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 417.1705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 616.407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 592.2625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 410.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 592.9292602539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 412.59014892578125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 408.12969970703125
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 2 21.0 1191.41116041 (15.353965082180355, 9)
loss 418.1552429199219
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 0 23.0 1226.82184023 (15.836943704090487, 7)
loss 410.3343811035156
Current State,action,reward,Response time,Next State:  (7, 15.836943704090487) 0 25.0 1362.73571067 (16.466876895473597, 5)
loss 601.6839599609375
Current State,action,reward,Response time,Next State:  (5, 16.466876895473597) 0 -94.8582850434 1770.57802209 (16.871606159345866, 3)
loss 416.47503662109375
Current State,action,reward,Response time,Next State:  (3, 16.871606159345866) 2 -107.904695815 3218.58285043 (17.534967586021782, 3)
loss 546.0765991210938
Current State,action,reward,Response time,Next State:  (3, 17.534967586021782) 2 -110.546347383 3349.04695815 (17.669285735563751, 3)
loss 413.3070068359375
Current State,action,reward,Response time,Next State:  (3, 17.669285735563751) 3 3.63583334771 3375.46347383 (17.944480812078613, 4)
loss 618.841796875
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 4 24.0 2223.64166652 (18.385807405229915, 6)
loss 587.6681518554688
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 2 24.0 1613.41973487 (18.671267839956315, 6)
loss 420.61517333984375
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 3 23.0 1632.29983282 (19.02839494033929, 7)
loss 443.9028015136719
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 1 24.0 1561.61651886 (19.286321916040979, 6)
loss 416.9418640136719
Current State,action,reward,Response time,Next State:  (6, 19.286321916040979) 3 23.0 1672.97895956 (19.340464848017284, 7)
loss 604.886962890625
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 3 22.0 1581.06369535 (19.213467265587269, 8)
loss 404.91259765625
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
loss 443.3556823730469
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 413.4912414550781
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 0 22.0 1405.95387237 (19.223969507401588, 8)
loss 644.564697265625
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 535.9124755859375
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 4 19.0 1431.06953264 (19.08360399753829, 11)
loss 410.4778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 420.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 413.4279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 592.5643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 415.12530517578125
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 542.7923583984375
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 399.5296630859375
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 0 22.0 1238.94234737 (15.950694610794756, 8)
loss 415.6269836425781
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 3 21.0 1303.27855664 (15.828704162850809, 9)
loss 405.0822448730469
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 442.8049011230469
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 412.80963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 460.44549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 419.2109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 501.6302490234375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 605.8909912109375
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 496.2471008300781
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 0 22.0 1223.96796344 (16.004586266677634, 8)
loss 414.3641052246094
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 3 21.0 1306.42824342 (16.017694914042416, 9)
loss 417.63031005859375
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 516.7412109375
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 2 20.0 1223.58357506 (16.11465619633363, 10)
loss 409.0735778808594
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 2 20.0 1232.44771583 (16.147078378791146, 10)
loss 657.4354858398438
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 0 22.0 1234.16752106 (16.229253414601111, 8)
loss 412.5946350097656
Current State,action,reward,Response time,Next State:  (8, 16.229253414601111) 4 20.0 1319.55886882 (16.295120821876548, 10)
loss 414.4715881347656
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 573.2164306640625
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 602.0882568359375
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 2 20.0 1270.73108663 (16.845818065953559, 10)
loss 533.787353515625
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 0 22.0 1271.23153331 (17.052961248403161, 8)
loss 455.4136047363281
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 3 21.0 1367.70030431 (17.215992726625572, 9)
loss 554.9468383789062
############ Running episode number: 51  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 560.4835815429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 526.3456420898438
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 528.2001953125
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 531.1780395507812
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 412.4247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 428.0778503417969
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 409.8846740722656
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 0 23.0 1000.33221268 (10.995673623987257, 7)
loss 421.0245361328125
Current State,action,reward,Response time,Next State:  (7, 10.995673623987257) 3 22.0 1061.04358539 (10.931193889570471, 8)
loss 612.2860717773438
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 0 24.0 1009.91486598 (10.816918347608043, 6)
loss 413.53515625
Current State,action,reward,Response time,Next State:  (6, 10.816918347608043) 3 23.0 1112.82017919 (10.819208572963639, 7)
loss 647.137451171875
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 0 25.0 1050.04686027 (10.768325938188134, 5)
loss 598.4696655273438
Current State,action,reward,Response time,Next State:  (5, 10.768325938188134) 2 25.0 1246.98250365 (10.772009508959538, 5)
loss 612.4757080078125
Current State,action,reward,Response time,Next State:  (5, 10.772009508959538) 3 24.0 1247.32095832 (10.644925616761762, 6)
loss 609.6973876953125
Current State,action,reward,Response time,Next State:  (6, 10.644925616761762) 3 23.0 1101.44473373 (10.58735855349979, 7)
loss 404.0428771972656
Current State,action,reward,Response time,Next State:  (7, 10.58735855349979) 1 24.0 1035.59872498 (10.552868829802469, 6)
loss 408.5364685058594
Current State,action,reward,Response time,Next State:  (6, 10.552868829802469) 0 26.0 1095.35618014 (10.553846649940214, 4)
loss 534.8417358398438
Current State,action,reward,Response time,Next State:  (4, 10.553846649940214) 0 -64.3122464116 1384.03318082 (10.489125480251131, 2)
loss 409.087890625
Current State,action,reward,Response time,Next State:  (2, 10.489125480251131) 3 27.0 2923.12246412 (10.448897752470936, 3)
loss 595.8952026367188
Current State,action,reward,Response time,Next State:  (3, 10.448897752470936) 1 -62.571500352 1955.42088812 (10.433149880183072, 2)
loss 600.5977172851562
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.433149880183072) 1 -62.8421061381 2905.71500352 (10.44185150623065, 2)
loss 425.9264221191406
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 10.44185150623065) 1 -60.636966392 2908.42106138 (10.370942817486826, 2)
loss 447.8694763183594
Current State,action,reward,Response time,Next State:  (2, 10.370942817486826) 3 27.0 2886.36966392 (10.42733414151318, 3)
loss 585.5144653320312
Current State,action,reward,Response time,Next State:  (3, 10.42733414151318) 3 26.0 1951.17994623 (10.388469398680568, 4)
loss 464.5988464355469
Current State,action,reward,Response time,Next State:  (4, 10.388469398680568) 3 25.0 1365.24559907 (10.344006106602812, 5)
loss 467.85150146484375
Current State,action,reward,Response time,Next State:  (5, 10.344006106602812) 1 26.0 1207.99505511 (10.319026962956018, 4)
loss 458.3487548828125
Current State,action,reward,Response time,Next State:  (4, 10.319026962956018) 1 27.0 1357.35663352 (10.30224719189987, 3)
loss 727.4360961914062
Current State,action,reward,Response time,Next State:  (3, 10.30224719189987) 3 26.0 1926.57894199 (10.278181486298042, 4)
loss 546.8308715820312
Current State,action,reward,Response time,Next State:  (4, 10.278181486298042) 3 25.0 1352.71640806 (10.268274366284802, 5)
loss 455.0423889160156
Current State,action,reward,Response time,Next State:  (5, 10.268274366284802) 2 25.0 1201.03665461 (10.335411397720526, 5)
loss 550.1332397460938
Current State,action,reward,Response time,Next State:  (5, 10.335411397720526) 4 23.0 1207.20535417 (10.305649118067803, 7)
loss 417.33917236328125
Current State,action,reward,Response time,Next State:  (7, 10.305649118067803) 3 22.0 1018.04351387 (10.24826025489064, 8)
loss 409.0402526855469
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 3 21.0 970.000949704 (10.276491935146446, 9)
loss 416.06976318359375
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 2 21.0 961.041178317 (10.236991269871366, 9)
loss 540.6162109375
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 0 23.0 958.973513426 (10.236272697871373, 7)
loss 472.7205810546875
Current State,action,reward,Response time,Next State:  (7, 10.236272697871373) 3 22.0 1013.72020216 (10.369891240151098, 8)
loss 609.303955078125
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 3 21.0 977.109647703 (10.316955310454549, 9)
loss 484.00970458984375
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 979.1377563476562
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 420.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 413.81671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 405.0879821777344
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 597.8950805664062
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 1 21.0 942.865015335 (10.624473674922116, 9)
loss 585.1972045898438
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 618.10986328125
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 409.0775146484375
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 615.655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 420.1097106933594
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 411.2989807128906
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 418.3976135253906
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 1 21.0 1004.62682792 (12.19918626616789, 9)
loss 563.7454223632812
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 606.2392578125
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 1 21.0 1040.79092857 (13.168618569876575, 9)
loss 548.0264282226562
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 424.3626403808594
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 567.9940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 454.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 595.8887329101562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 414.92529296875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 418.5561828613281
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 2 21.0 1285.07728144 (16.871606159345866, 9)
loss 540.80078125
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 4 19.0 1306.26286107 (17.534967586021782, 11)
loss 767.3025512695312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 412.3446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 550.7890014648438
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 638.0381469726562
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 418.70477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 634.7318725585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 515.2509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 600.3426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 446.88177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 538.884765625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 631.5791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 414.6566467285156
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 408.1888122558594
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 530.1605834960938
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 1 22.0 1422.05003169 (18.668181536495972, 8)
loss 818.5513305664062
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
loss 423.3813171386719
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 415.12237548828125
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 1 21.0 1323.29060362 (17.229782241685768, 9)
loss 411.5430908203125
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 537.67919921875
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 2 20.0 1271.03516211 (16.237094554670044, 10)
loss 534.8822631835938
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 523.8263549804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 455.2059326171875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 494.62371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 641.279052734375
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 569.8374633789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 458.337158203125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 413.58404541015625
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 452.40838623046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 413.3500061035156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 419.4972229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 560.0108032226562
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 488.9864501953125
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 0 23.0 1257.89293893 (16.11465619633363, 7)
loss 440.55047607421875
Current State,action,reward,Response time,Next State:  (7, 16.11465619633363) 0 25.0 1380.04184534 (16.147078378791146, 5)
loss 415.65020751953125
Current State,action,reward,Response time,Next State:  (5, 16.147078378791146) 3 24.0 1741.1942241 (16.229253414601111, 6)
loss 641.9957275390625
Current State,action,reward,Response time,Next State:  (6, 16.229253414601111) 3 23.0 1470.78718189 (16.295120821876548, 7)
loss 444.0432434082031
Current State,action,reward,Response time,Next State:  (7, 16.295120821876548) 3 22.0 1391.28781087 (16.667936385136993, 8)
loss 439.5550842285156
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 3 21.0 1345.1976051 (16.836383524612351, 9)
loss 522.2811889648438
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 4 19.0 1304.41912996 (16.845818065953559, 11)
loss 512.8972778320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 631.5297241210938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 418.6821594238281
############ Running episode number: 52  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 478.2455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 632.2002563476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 409.0686340332031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 798.5817260742188
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 444.4208679199219
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 638.4302978515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 606.3272094726562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 436.2784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 406.4881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 408.39495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 444.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 538.5339965820312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 534.7620239257812
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 537.9345092773438
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 718.2657470703125
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 0 23.0 977.313511661 (10.552868829802469, 7)
loss 689.9442749023438
Current State,action,reward,Response time,Next State:  (7, 10.552868829802469) 2 23.0 1033.44943815 (10.553846649940214, 7)
loss 622.5780639648438
Current State,action,reward,Response time,Next State:  (7, 10.553846649940214) 3 22.0 1033.5103727 (10.489125480251131, 8)
loss 419.8600158691406
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 3 21.0 984.078268423 (10.448897752470936, 9)
loss 495.86810302734375
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 441.13287353515625
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 531.6189575195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 404.0736999511719
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 630.2537231445312
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 631.2420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 421.8013610839844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 637.9800415039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 405.2689514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 589.6083984375
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 464.0859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 499.46759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 414.7054138183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 714.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 555.8383178710938
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 395.24462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 553.272705078125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 514.2830810546875
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 2 20.0 927.721874973 (10.316955310454549, 10)
loss 405.7245788574219
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 2 20.0 924.913936648 (10.333617326102203, 10)
loss 419.7372131347656
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 0 22.0 925.797758139 (10.390165524255663, 8)
loss 412.54052734375
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 2 22.0 978.294574081 (10.425974763084863, 8)
loss 588.0536499023438
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 2 22.0 980.387437704 (10.546025383098053, 8)
loss 437.99566650390625
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 3 21.0 987.40377158 (10.655373370049301, 9)
loss 593.90478515625
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 447.75909423828125
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 566.0983276367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 586.9518432617188
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 792.3104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 414.03924560546875
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 434.73138427734375
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 398.79290771484375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 586.0477905273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 407.8972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 418.482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 412.77178955078125
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
loss 600.5107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 417.50201416015625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 401.01654052734375
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 842.2060546875
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 415.1089172363281
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 1 21.0 1251.130943 (16.871606159345866, 9)
loss 405.85662841796875
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 407.0901184082031
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 396.52850341796875
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 407.887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 399.57843017578125
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 408.6334533691406
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 1 22.0 1400.46626871 (19.02839494033929, 8)
loss 489.97320556640625
Current State,action,reward,Response time,Next State:  (8, 19.02839494033929) 3 21.0 1483.15412147 (19.286321916040979, 9)
loss 516.5183715820312
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 4 19.0 1432.66131431 (19.340464848017284, 11)
loss 410.4432373046875
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 398.58001708984375
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 593.5225830078125
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 2 21.0 1425.04216908 (19.385636054792762, 9)
loss 517.299072265625
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 634.728759765625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 1 21.0 1397.37841716 (19.25591252280865, 9)
loss 597.0709228515625
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 497.3500671386719
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 406.64813232421875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 687.9725952148438
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 1 21.0 1352.39307459 (17.82724819986867, 9)
loss 433.1234130859375
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 815.6890869140625
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 586.3792114257812
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 498.17962646484375
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 4 19.0 1273.04930988 (15.950694610794756, 11)
loss 604.6962280273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 433.6176452636719
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 519.502685546875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 0 22.0 1202.54023315 (15.446694946204717, 8)
loss 593.3568115234375
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 3 21.0 1273.82239956 (15.750501603468638, 9)
loss 482.50604248046875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 403.5803527832031
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 633.4727172851562
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 589.6361694335938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 441.8202819824219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 397.5522766113281
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 439.186767578125
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 408.6449279785156
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 591.5650634765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 388.57635498046875
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 585.2523803710938
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 626.6995849609375
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 415.2572937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 623.126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 403.48114013671875
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 443.28448486328125
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 513.3734130859375
############ Running episode number: 53  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 441.4774475097656
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 399.322021484375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 4 19.0 1040.0771169 (11.61852219546234, 11)
loss 773.4874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 543.8818969726562
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 404.73431396484375
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 570.281982421875
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 0 22.0 974.730436685 (11.027107764209074, 8)
loss 520.5550537109375
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 2 22.0 1015.52053272 (10.995673623987257, 8)
loss 444.9504089355469
Current State,action,reward,Response time,Next State:  (8, 10.995673623987257) 3 21.0 1013.68337084 (10.931193889570471, 9)
loss 577.665771484375
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 0 23.0 995.311594677 (10.816918347608043, 7)
loss 404.3333740234375
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 1 24.0 1049.90414092 (10.819208572963639, 6)
loss 404.13519287109375
Current State,action,reward,Response time,Next State:  (6, 10.819208572963639) 3 23.0 1112.97165264 (10.768325938188134, 7)
loss 398.2879943847656
Current State,action,reward,Response time,Next State:  (7, 10.768325938188134) 2 23.0 1046.87602081 (10.772009508959538, 7)
loss 416.8605041503906
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 2 23.0 1047.1055689 (10.644925616761762, 7)
loss 400.9767150878906
Current State,action,reward,Response time,Next State:  (7, 10.644925616761762) 3 22.0 1039.18611617 (10.58735855349979, 8)
loss 408.44879150390625
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 410.1272277832031
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 1 22.0 975.508144832 (10.553846649940214, 8)
loss 404.705322265625
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 0 24.0 987.860883917 (10.489125480251131, 6)
loss 444.1692199707031
Current State,action,reward,Response time,Next State:  (6, 10.489125480251131) 3 23.0 1091.14025199 (10.448897752470936, 7)
loss 411.526123046875
Current State,action,reward,Response time,Next State:  (7, 10.448897752470936) 4 21.0 1026.97030049 (10.433149880183072, 9)
loss 439.1308898925781
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 4 19.0 969.241448633 (10.44185150623065, 11)
loss 403.1474914550781
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 673.0863647460938
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 525.265625
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 2 20.0 930.768881517 (10.388469398680568, 10)
loss 625.1590576171875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 405.1907653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 437.68194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 494.2093811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 403.1693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 393.77508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 494.74420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 811.8768920898438
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 525.9513549804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 413.4317932128906
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 397.65185546875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 4 19.0 920.672316722 (10.236272697871373, 11)
loss 400.3287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 511.2320556640625
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 441.78900146484375
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 429.97125244140625
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 2 20.0 925.797758139 (10.390165524255663, 10)
loss 436.2540283203125
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 583.343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 436.5529479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 579.2434692382812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 434.1510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 400.2080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 434.05999755859375
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 443.3013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 652.322265625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 521.8363647460938
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 4 19.0 1034.00195058 (11.819721938468785, 11)
loss 603.02294921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 399.2594299316406
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 397.8429870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 590.950439453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 631.4146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 481.29351806640625
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 478.8367614746094
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 641.2160034179688
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 0 22.0 1192.09754638 (15.836943704090487, 8)
loss 408.75079345703125
Current State,action,reward,Response time,Next State:  (8, 15.836943704090487) 0 24.0 1296.63040821 (16.466876895473597, 6)
loss 619.728759765625
Current State,action,reward,Response time,Next State:  (6, 16.466876895473597) 1 25.0 1486.50338648 (16.871606159345866, 5)
loss 614.6591186523438
Current State,action,reward,Response time,Next State:  (5, 16.871606159345866) 0 -107.904695815 1807.76544576 (17.534967586021782, 3)
loss 435.4270935058594
Current State,action,reward,Response time,Next State:  (3, 17.534967586021782) 3 6.76217022756 3349.04695815 (17.669285735563751, 4)
loss 646.69482421875
Current State,action,reward,Response time,Next State:  (4, 17.669285735563751) 3 25.0 2192.37829772 (17.944480812078613, 5)
loss 639.3240966796875
Current State,action,reward,Response time,Next State:  (5, 17.944480812078613) 3 24.0 1906.34355171 (18.385807405229915, 6)
loss 403.7226257324219
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 2 24.0 1613.41973487 (18.671267839956315, 6)
loss 431.80584716796875
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 3 23.0 1632.29983282 (19.02839494033929, 7)
loss 398.4872741699219
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 3 22.0 1561.61651886 (19.286321916040979, 8)
loss 439.699951171875
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 1 23.0 1498.22861069 (19.340464848017284, 7)
loss 477.7276916503906
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 3 22.0 1581.06369535 (19.213467265587269, 8)
loss 596.1568603515625
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
loss 530.209228515625
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 398.4140319824219
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 450.0932312011719
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 398.128662109375
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 515.1414184570312
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 639.150146484375
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 4 19.0 1400.30471596 (18.375894992990247, 11)
loss 497.1636657714844
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 408.3529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 471.3743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 514.5261840820312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 632.5213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 546.0121459960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 601.0802001953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 404.8403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 517.616455078125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 403.07177734375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 2 20.0 1213.1314661 (15.817158911312735, 10)
loss 405.1899108886719
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 4 19.0 1216.66724247 (15.829956988360925, 11)
loss 394.7853698730469
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 510.3020324707031
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 399.1725769042969
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 604.91259765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 408.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 401.47760009765625
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 402.171142578125
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 441.7032470703125
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 402.1630554199219
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 4 19.0 1272.63886489 (16.295120821876548, 11)
loss 569.9999389648438
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 399.1336364746094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 515.9041137695312
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 521.3187866210938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 404.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 437.5213928222656
############ Running episode number: 54  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 588.8460083007812
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 544.035400390625
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 402.8363952636719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 534.3550415039062
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 444.7327575683594
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 550.7079467773438
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 476.636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 483.0496520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 577.46044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 406.5335998535156
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 467.57891845703125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 442.9887390136719
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 2 21.0 986.786261176 (10.772009508959538, 9)
loss 402.0997619628906
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 508.0777893066406
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 518.2967529296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 498.2556457519531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 388.0810852050781
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 591.3438720703125
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 404.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 397.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 817.5371704101562
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 629.7406005859375
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 666.8518676757812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 434.5341796875
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 593.2089233398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 430.0339050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 583.7110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 427.9672546386719
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 410.937255859375
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 1 22.0 960.611029141 (10.335411397720526, 8)
loss 704.9069213867188
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 1 23.0 975.09448038 (10.305649118067803, 7)
loss 398.03582763671875
Current State,action,reward,Response time,Next State:  (7, 10.305649118067803) 4 21.0 1018.04351387 (10.24826025489064, 9)
loss 400.75787353515625
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 4 19.0 959.563389179 (10.276491935146446, 11)
loss 897.9114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 584.42626953125
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 588.384033203125
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 617.8307495117188
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 478.4098205566406
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 437.045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 390.6392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 520.5518188476562
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 400.5262145996094
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 2 20.0 937.064750655 (10.655373370049301, 10)
loss 602.0726318359375
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 0 22.0 942.865015335 (10.624473674922116, 8)
loss 741.8605346679688
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 396.4861145019531
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 1 22.0 986.945968488 (10.924797168745895, 8)
loss 484.9151611328125
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 2 22.0 1009.54101094 (11.039747673816453, 8)
loss 666.3831176757812
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 3 21.0 1016.25926965 (11.271571944085663, 9)
loss 399.6848449707031
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 0 23.0 1013.12870607 (11.670334358779868, 7)
loss 391.9271545410156
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 0 25.0 1103.08623692 (11.819721938468785, 5)
loss 398.5476989746094
Current State,action,reward,Response time,Next State:  (5, 11.819721938468785) 3 24.0 1343.58710331 (12.19918626616789, 6)
loss 628.5255126953125
Current State,action,reward,Response time,Next State:  (6, 12.19918626616789) 3 23.0 1204.24214357 (12.501496275411796, 7)
loss 782.1594848632812
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 3 22.0 1154.88153049 (13.168618569876575, 8)
loss 401.70208740234375
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 3 21.0 1140.68069275 (13.649658108197247, 9)
loss 443.1905822753906
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 4 19.0 1137.6097809 (14.283719188889453, 11)
loss 514.7880859375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 432.48992919921875
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 2 20.0 1156.21398489 (15.353965082180355, 10)
loss 688.3250732421875
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 406.7181091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 473.8760070800781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 393.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 407.8585510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 594.1088256835938
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 405.423095703125
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 2 20.0 1329.50910109 (18.385807405229915, 10)
loss 444.6572265625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 2 20.0 1352.9188695 (18.671267839956315, 10)
loss 412.69366455078125
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 404.87176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 400.9689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 440.78436279296875
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 439.757568359375
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 404.3716735839844
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 400.44091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 525.379150390625
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 519.1958618164062
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 621.7821655273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 414.8269958496094
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 569.6260375976562
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 1 21.0 1352.39307459 (17.82724819986867, 9)
loss 839.0313720703125
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 2 21.0 1356.28600579 (17.229782241685768, 9)
loss 543.5221557617188
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 1 22.0 1325.01161138 (16.84211602880065, 8)
loss 399.9667053222656
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 4 20.0 1355.37749867 (16.237094554670044, 10)
loss 440.33489990234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 4 19.0 1238.94234737 (15.950694610794756, 11)
loss 405.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 557.633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 519.45361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 709.4175415039062
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 397.9354553222656
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 1 22.0 1247.57857022 (15.817158911312735, 8)
loss 410.690185546875
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
loss 400.41827392578125
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 2 21.0 1251.7376675 (15.892373986997768, 9)
loss 409.6712646484375
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 2 21.0 1255.00488935 (15.954793861767499, 9)
loss 430.0827331542969
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 406.1106872558594
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 512.1539916992188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 409.0675354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 402.3934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 436.2088317871094
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 614.8450317382812
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 497.64276123046875
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 4 19.0 1276.0866986 (16.667936385136993, 11)
loss 597.9989624023438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 391.8531188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 602.8395385742188
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 411.4627990722656
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 506.7746887207031
############ Running episode number: 55  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 437.3291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 405.6528625488281
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 560.1502685546875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 610.1337280273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 400.766845703125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 394.9457092285156
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 598.416748046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 678.20703125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 517.5517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 513.2022705078125
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 0 21.0 939.691239608 (10.819208572963639, 9)
loss 413.7576599121094
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 405.62884521484375
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 674.4482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 405.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 409.9580383300781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 410.29193115234375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 434.0709228515625
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 415.04534912109375
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 397.8742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 403.0345153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 413.9631652832031
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 526.1539916992188
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 0 23.0 965.985215893 (10.42733414151318, 7)
loss 592.688232421875
Current State,action,reward,Response time,Next State:  (7, 10.42733414151318) 2 23.0 1025.62652674 (10.388469398680568, 7)
loss 509.17877197265625
Current State,action,reward,Response time,Next State:  (7, 10.388469398680568) 3 22.0 1023.20460302 (10.344006106602812, 8)
loss 596.2401123046875
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 3 21.0 975.596796379 (10.319026962956018, 9)
loss 407.2228088378906
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 511.11529541015625
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 0 22.0 924.133757854 (10.278181486298042, 8)
loss 523.8947143554688
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 402.9976806640625
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 445.68524169921875
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 506.1305847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 580.9291381835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 414.739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 429.2730407714844
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 413.37005615234375
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 0 22.0 920.634200723 (10.369891240151098, 8)
loss 403.30010986328125
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 2 22.0 977.109647703 (10.316955310454549, 8)
loss 441.9672546386719
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 1 23.0 974.015818144 (10.333617326102203, 7)
loss 405.8314208984375
Current State,action,reward,Response time,Next State:  (7, 10.333617326102203) 3 22.0 1019.78640117 (10.390165524255663, 8)
loss 529.0930786132812
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 2 22.0 978.294574081 (10.425974763084863, 8)
loss 407.7666931152344
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 4 20.0 980.387437704 (10.546025383098053, 10)
loss 549.07568359375
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 600.0333862304688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 778.9962768554688
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 593.3651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 585.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 572.6234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 811.7532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 405.38916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 412.1828308105469
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 444.3860168457031
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 0 23.0 1061.68473805 (12.501496275411796, 7)
loss 750.5460205078125
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 3 22.0 1154.88153049 (13.168618569876575, 8)
loss 700.290771484375
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 4 20.0 1140.68069275 (13.649658108197247, 10)
loss 397.60491943359375
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 750.3405151367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 449.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 629.0674438476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 442.6186828613281
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 433.1712341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 587.9767456054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 436.50421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 439.3016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 408.8630676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 555.823974609375
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 438.8308410644531
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 399.43670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 556.49853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 624.2430419921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 441.5732116699219
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 445.60845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 524.9007568359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 490.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 627.636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 826.4457397460938
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 399.3623352050781
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 2 20.0 1367.89714889 (18.375894992990247, 10)
loss 400.36993408203125
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 434.4275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 437.1789855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 410.29638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 524.5872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 486.2435607910156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 607.5071411132812
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 411.099609375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 718.3871459960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 526.2556762695312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 424.4432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 712.4771728515625
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 399.21868896484375
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 392.3396301269531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 402.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 416.2685852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 402.0807189941406
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 509.4316711425781
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 416.9535217285156
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 522.2434692382812
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 0 22.0 1238.52642122 (16.295120821876548, 8)
loss 403.2676086425781
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 3 21.0 1323.40847593 (16.667936385136993, 9)
loss 409.94189453125
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 402.9359436035156
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 4 19.0 1270.73108663 (16.845818065953559, 11)
loss 400.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 514.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 508.9688415527344
############ Running episode number: 56  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 508.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 407.4513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 585.8190307617188
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 408.933349609375
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 518.6614379882812
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 4 19.0 979.00811241 (11.25610796929319, 11)
loss 437.3790588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 861.6398315429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 529.135986328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 712.4403076171875
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 441.107177734375
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 399.8436279296875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 592.637451171875
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 400.3313293457031
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 511.420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 440.63385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 440.0951843261719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 396.2333984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 405.6331481933594
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 442.4167785644531
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 4 19.0 970.065772031 (10.433149880183072, 11)
loss 403.6859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 404.1864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 570.3722534179688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 611.5503540039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 565.9420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 397.9136657714844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 399.95965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 627.7403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 411.0131530761719
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 405.8996887207031
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 400.5716247558594
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 406.735595703125
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 531.7696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 701.2681884765625
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 523.440673828125
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 1 21.0 920.672316722 (10.236272697871373, 9)
loss 393.19134521484375
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 0 23.0 958.935899728 (10.369891240151098, 7)
loss 407.2311706542969
Current State,action,reward,Response time,Next State:  (7, 10.369891240151098) 4 21.0 1022.04687291 (10.316955310454549, 9)
loss 407.7415771484375
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 404.7093811035156
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 1 21.0 925.797758139 (10.390165524255663, 9)
loss 550.950927734375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 4 19.0 966.991429728 (10.425974763084863, 11)
loss 392.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 407.2261962890625
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 398.4033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 563.7880859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 508.6100769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 584.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 510.1712646484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 401.0482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 525.8192138671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 436.4036865234375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 412.2744445800781
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 406.7781982421875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 405.3048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 399.3674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 514.59375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 572.4869384765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 405.3208312988281
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 464.71734619140625
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 635.1475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 534.9682006835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 428.24005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 417.6099548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 403.9044189453125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 407.65533447265625
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 2 21.0 1385.5238237 (18.671267839956315, 9)
loss 440.3301086425781
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 429.5644836425781
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 397.4183044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 507.64630126953125
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 440.61236572265625
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 0 22.0 1396.82133527 (19.140765783401285, 8)
loss 719.5726928710938
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 525.298095703125
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 0 23.0 1437.85991935 (19.223969507401588, 7)
loss 553.9217529296875
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 3 22.0 1573.80408654 (19.25591252280865, 8)
loss 517.174072265625
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 3 21.0 1496.45133993 (19.08360399753829, 9)
loss 400.3915100097656
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 4 19.0 1422.05003169 (18.668181536495972, 11)
loss 426.61236572265625
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 517.5582885742188
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 558.6187744140625
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 436.25079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 561.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 520.1560668945312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 400.7562561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 595.5677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 556.0097045898438
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 495.7009582519531
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 0 22.0 1197.01631782 (15.750501603468638, 8)
loss 516.0138549804688
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 3 21.0 1291.57831736 (15.817158911312735, 9)
loss 558.0328369140625
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 4 19.0 1251.06775133 (15.829956988360925, 11)
loss 435.4134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 395.619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 387.619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 437.0171203613281
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 415.06005859375
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 2 21.0 1261.56482143 (15.947547279389703, 9)
loss 392.8274230957031
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 1 22.0 1257.89293893 (16.11465619633363, 8)
loss 393.0068054199219
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 4 20.0 1312.86125789 (16.147078378791146, 10)
loss 428.0459899902344
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 1 21.0 1234.16752106 (16.229253414601111, 9)
loss 590.47607421875
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 2 21.0 1272.63886489 (16.295120821876548, 9)
loss 407.1062927246094
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 0 23.0 1276.0866986 (16.667936385136993, 7)
loss 407.3851623535156
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 3 22.0 1414.52045804 (16.836383524612351, 8)
loss 407.6132507324219
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 3 21.0 1355.04246364 (16.845818065953559, 9)
loss 406.5572509765625
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 554.9920654296875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 401.5760192871094
############ Running episode number: 57  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 440.32147216796875
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 439.80279541015625
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 4 19.0 1040.0771169 (11.61852219546234, 11)
loss 434.08575439453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 416.6059265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 515.484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 471.77685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 416.3348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 394.30181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 486.4949035644531
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 397.26531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 439.8226013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 406.501220703125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 439.7699279785156
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 443.7269287109375
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 522.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 629.5562133789062
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 472.8324279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 403.09332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 405.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 514.8977661132812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 390.1820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 438.8873291015625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 518.32275390625
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 4 19.0 968.937023414 (10.388469398680568, 11)
loss 521.8895874023438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 560.2750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 391.0353088378906
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 423.5675964355469
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 565.2298583984375
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 597.0691528320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 392.6851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 395.8665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 560.4450073242188
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 387.92913818359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 518.84765625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 401.5909729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 517.3163452148438
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 402.10528564453125
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 399.4770812988281
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 1 21.0 925.797758139 (10.390165524255663, 9)
loss 506.867919921875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 393.5359191894531
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 428.2027282714844
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 515.7830200195312
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 2 21.0 980.873751654 (10.624473674922116, 9)
loss 526.5565185546875
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 0 23.0 979.256305105 (10.771376986314287, 7)
loss 406.1185302734375
Current State,action,reward,Response time,Next State:  (7, 10.771376986314287) 3 22.0 1047.06615216 (10.924797168745895, 8)
loss 400.743408203125
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 3 21.0 1009.54101094 (11.039747673816453, 9)
loss 546.2357788085938
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 552.98681640625
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 529.4944458007812
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 519.8348388671875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 415.0517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 394.32464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 514.9154663085938
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 397.173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 439.7942810058594
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 555.6250610351562
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 408.47833251953125
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 440.20947265625
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 400.69061279296875
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 393.0345458984375
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 440.6609191894531
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 401.64990234375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 391.0965881347656
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 393.77239990234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 395.7136535644531
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 401.6596374511719
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 1 21.0 1387.00434183 (19.286321916040979, 9)
loss 518.3475952148438
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 407.95172119140625
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 445.2933044433594
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 399.50384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 549.9513549804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 406.1315002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 433.8009338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 516.5929565429688
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 517.6205444335938
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 4 19.0 1400.30471596 (18.375894992990247, 11)
loss 394.45562744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 487.6523132324219
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 487.93798828125
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 4 19.0 1325.01161138 (16.84211602880065, 11)
loss 413.8641052246094
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 395.49969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 512.60205078125
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 409.9083557128906
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 443.6924743652344
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 4 19.0 1202.54023315 (15.446694946204717, 11)
loss 525.4627075195312
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 404.18505859375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 413.45574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 444.3273010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 438.5122985839844
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 405.6445617675781
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 476.28826904296875
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 394.615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 401.6407775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 400.3536682128906
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 398.29736328125
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 0 22.0 1234.16752106 (16.229253414601111, 8)
loss 397.8733215332031
Current State,action,reward,Response time,Next State:  (8, 16.229253414601111) 2 22.0 1319.55886882 (16.295120821876548, 8)
loss 402.42803955078125
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 3 21.0 1323.40847593 (16.667936385136993, 9)
loss 408.7569580078125
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 428.1772155761719
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 391.1863708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 392.5318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 438.98553466796875
############ Running episode number: 58  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 440.7760925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 541.1107177734375
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 553.7416381835938
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 518.54150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 442.22174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 393.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 397.9981384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 445.33441162109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 403.20404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 514.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 516.0556030273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 512.6361694335938
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 391.010009765625
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 396.0534362792969
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 537.2364501953125
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 395.54608154296875
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 2 21.0 975.508144832 (10.553846649940214, 9)
loss 404.60797119140625
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 512.2507934570312
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 547.1342163085938
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 629.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 635.8773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 436.1465148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 505.4519958496094
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 414.549560546875
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 409.057373046875
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 1 21.0 926.348821567 (10.319026962956018, 9)
loss 546.5491943359375
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 401.41229248046875
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 396.68707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 437.02294921875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 518.4987182617188
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 401.6424560546875
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 0 22.0 924.314209939 (10.24826025489064, 8)
loss 430.43927001953125
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 3 21.0 970.000949704 (10.276491935146446, 9)
loss 513.9314575195312
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 2 21.0 961.041178317 (10.236991269871366, 9)
loss 432.3937072753906
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 2 21.0 958.973513426 (10.236272697871373, 9)
loss 532.7478637695312
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 2 21.0 958.935899728 (10.369891240151098, 9)
loss 448.8876037597656
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 1 22.0 965.930171009 (10.316955310454549, 8)
loss 491.362548828125
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 3 21.0 974.015818144 (10.333617326102203, 9)
loss 397.31634521484375
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 638.357177734375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 2 20.0 928.797305964 (10.425974763084863, 10)
loss 427.2409973144531
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 0 22.0 930.696774523 (10.546025383098053, 8)
loss 447.5404357910156
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 3 21.0 987.40377158 (10.655373370049301, 9)
loss 441.70159912109375
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 430.47418212890625
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 409.1968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 396.4270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 434.1820068359375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 434.3335266113281
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 487.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 397.4413757324219
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 433.7856750488281
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 4 19.0 1024.75516863 (12.501496275411796, 11)
loss 518.3291015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 502.7933044433594
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 445.1534729003906
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 4 19.0 1101.69413046 (14.283719188889453, 11)
loss 516.03564453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 399.1781311035156
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 503.4639587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 554.8882446289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 428.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 569.7671508789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 430.1047058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 747.1532592773438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 556.35205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 521.10205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 395.78302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 399.6454772949219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 399.9666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 508.7103576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 406.80694580078125
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 602.15673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 410.760498046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 400.25213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 447.2480773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 435.26239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 395.12432861328125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 595.53662109375
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 398.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 489.6703186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 436.4519348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 559.2603759765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 436.4991455078125
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 389.5978698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 406.33734130859375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 394.7357177734375
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 4 19.0 1231.67579099 (15.750501603468638, 11)
loss 634.8306274414062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 405.8768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 430.26751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 436.5709228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 627.0574951171875
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 395.5115966796875
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 516.130126953125
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 561.2435302734375
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 441.7939758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 396.317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 549.3209228515625
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 553.9149780273438
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 513.7723388671875
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 0 22.0 1261.79596106 (16.836383524612351, 8)
loss 395.854736328125
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 3 21.0 1355.04246364 (16.845818065953559, 9)
loss 516.7889404296875
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 4 19.0 1304.91298164 (17.052961248403161, 11)
loss 398.8269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 437.0745544433594
############ Running episode number: 59  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 393.80511474609375
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 1 21.0 1012.7846064 (11.786394321941378, 9)
loss 482.8708801269531
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 2 21.0 1040.0771169 (11.61852219546234, 9)
loss 551.5993041992188
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 519.7971801757812
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 485.8033752441406
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 444.86053466796875
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 1 22.0 1012.3192433 (11.027107764209074, 8)
loss 408.3107604980469
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 3 21.0 1015.52053272 (10.995673623987257, 9)
loss 541.3595581054688
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 399.9142761230469
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 1 21.0 957.495664348 (10.816918347608043, 9)
loss 511.702880859375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 4 19.0 989.329834005 (10.819208572963639, 11)
loss 600.6400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 396.29437255859375
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 548.1934814453125
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 394.4884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 508.7474670410156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 388.3653564453125
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 512.1548461914062
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 1 22.0 975.559328891 (10.489125480251131, 8)
loss 428.79736328125
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 3 21.0 984.078268423 (10.448897752470936, 9)
loss 466.4974670410156
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 4 19.0 970.065772031 (10.433149880183072, 11)
loss 429.9070129394531
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 467.606201171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 386.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 517.57421875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 435.2456359863281
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 481.4356689453125
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 2 20.0 926.348821567 (10.319026962956018, 10)
loss 507.69635009765625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 4 19.0 925.023825574 (10.30224719189987, 11)
loss 427.8739013671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 393.54193115234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 532.1356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 499.2276916503906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 389.9364318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 403.3213195800781
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 401.73931884765625
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 557.2526245117188
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 0 22.0 920.672316722 (10.236272697871373, 8)
loss 666.0690307617188
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 0 24.0 969.300339391 (10.369891240151098, 6)
loss 442.49102783203125
Current State,action,reward,Response time,Next State:  (6, 10.369891240151098) 1 25.0 1083.25420594 (10.316955310454549, 5)
loss 394.7323913574219
Current State,action,reward,Response time,Next State:  (5, 10.316955310454549) 3 24.0 1205.50956788 (10.333617326102203, 6)
loss 561.415771484375
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 1 25.0 1080.85508169 (10.390165524255663, 5)
loss 439.9434509277344
Current State,action,reward,Response time,Next State:  (5, 10.390165524255663) 3 24.0 1212.23628493 (10.425974763084863, 6)
loss 397.9833984375
Current State,action,reward,Response time,Next State:  (6, 10.425974763084863) 4 22.0 1086.96352001 (10.546025383098053, 8)
loss 432.8578796386719
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 0 24.0 987.40377158 (10.655373370049301, 6)
loss 479.09490966796875
Current State,action,reward,Response time,Next State:  (6, 10.655373370049301) 0 26.0 1102.13573879 (10.624473674922116, 4)
loss 410.3341064453125
Current State,action,reward,Response time,Next State:  (4, 10.624473674922116) 3 25.0 1392.05672091 (10.771376986314287, 5)
loss 390.6200256347656
Current State,action,reward,Response time,Next State:  (5, 10.771376986314287) 0 22.0983388322 1247.26284073 (10.924797168745895, 3)
loss 436.4178466796875
Current State,action,reward,Response time,Next State:  (3, 10.924797168745895) 3 26.0 2049.01661168 (11.039747673816453, 4)
loss 677.2676391601562
Current State,action,reward,Response time,Next State:  (4, 11.039747673816453) 4 24.0 1439.23367024 (11.271571944085663, 6)
loss 406.1981201171875
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 1 25.0 1142.8905616 (11.670334358779868, 5)
loss 393.9577331542969
Current State,action,reward,Response time,Next State:  (5, 11.670334358779868) 4 23.0 1329.8610407 (11.819721938468785, 7)
loss 402.573486328125
Current State,action,reward,Response time,Next State:  (7, 11.819721938468785) 1 24.0 1112.39558253 (12.19918626616789, 6)
loss 393.6031188964844
Current State,action,reward,Response time,Next State:  (6, 12.19918626616789) 2 24.0 1204.24214357 (12.501496275411796, 6)
loss 412.02203369140625
Current State,action,reward,Response time,Next State:  (6, 12.501496275411796) 1 25.0 1224.23665732 (13.168618569876575, 5)
loss 410.1116638183594
Current State,action,reward,Response time,Next State:  (5, 13.168618569876575) 0 -31.4918364219 1467.5267217 (13.649658108197247, 3)
loss 405.6398010253906
Current State,action,reward,Response time,Next State:  (3, 13.649658108197247) 3 26.0 2584.91836422 (14.283719188889453, 4)
loss 397.9461364746094
Current State,action,reward,Response time,Next State:  (4, 14.283719188889453) 0 -194.562941566 1807.76307539 (14.677479537099185, 2)
loss 441.7432556152344
Current State,action,reward,Response time,Next State:  (2, 14.677479537099185) 3 -65.0106513089 4225.62941566 (15.353965082180355, 3)
loss 410.4815979003906
Current State,action,reward,Response time,Next State:  (3, 15.353965082180355) 1 -230.62030723 2920.10651309 (15.836943704090487, 2)
loss 397.9369201660156
Current State,action,reward,Response time,Next State:  (2, 15.836943704090487) 2 -250.210159225 4586.2030723 (16.466876895473597, 2)
loss 472.0285949707031
Current State,action,reward,Response time,Next State:  (2, 16.466876895473597) 3 -94.8582850434 4782.10159225 (16.871606159345866, 3)
loss 394.6285095214844
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 16.871606159345866) 0 -283.425965015 3218.58285043 (17.534967586021782, 2)
loss 1436.2274169921875
Current State,action,reward,Response time,Next State:  (2, 17.534967586021782) 4 6.76217022756 5114.25965015 (17.669285735563751, 4)
loss 1584.969482421875
Current State,action,reward,Response time,Next State:  (4, 17.669285735563751) 3 25.0 2192.37829772 (17.944480812078613, 5)
loss 401.0326232910156
Current State,action,reward,Response time,Next State:  (5, 17.944480812078613) 3 24.0 1906.34355171 (18.385807405229915, 6)
loss 487.7305603027344
Current State,action,reward,Response time,Next State:  (6, 18.385807405229915) 3 23.0 1613.41973487 (18.671267839956315, 7)
loss 473.3092956542969
Current State,action,reward,Response time,Next State:  (7, 18.671267839956315) 0 24.4064010816 1539.36152541 (19.02839494033929, 5)
loss 1103.6744384765625
Current State,action,reward,Response time,Next State:  (5, 19.02839494033929) 4 23.0 2005.93598918 (19.286321916040979, 7)
loss 1236.6937255859375
Current State,action,reward,Response time,Next State:  (7, 19.286321916040979) 1 24.0 1577.68968482 (19.340464848017284, 6)
loss 428.3427429199219
Current State,action,reward,Response time,Next State:  (6, 19.340464848017284) 0 -10.7804098492 1676.55992467 (19.213467265587269, 4)
loss 401.0440673828125
Current State,action,reward,Response time,Next State:  (4, 19.213467265587269) 1 -139.486126476 2367.80409849 (19.140765783401285, 3)
loss 397.3990173339844
Current State,action,reward,Response time,Next State:  (3, 19.140765783401285) 3 -12.7363228767 3664.86126476 (19.385636054792762, 4)
loss 510.9574890136719
Current State,action,reward,Response time,Next State:  (4, 19.385636054792762) 1 -141.12250435 2387.36322877 (19.223969507401588, 3)
loss 1823.9716796875
Current State,action,reward,Response time,Next State:  (3, 19.223969507401588) 2 -141.750731563 3681.2250435 (19.25591252280865, 3)
loss 621.67333984375
Current State,action,reward,Response time,Next State:  (3, 19.25591252280865) 3 -9.30510611419 3687.50731563 (19.08360399753829, 4)
loss 402.36865234375
Current State,action,reward,Response time,Next State:  (4, 19.08360399753829) 3 25.0 2353.05106114 (18.668181536495972, 5)
loss 406.99468994140625
Current State,action,reward,Response time,Next State:  (5, 18.668181536495972) 1 -1.26522113827 1972.8387816 (18.375894992990247, 4)
loss 568.1222534179688
Current State,action,reward,Response time,Next State:  (4, 18.375894992990247) 3 25.0 2272.65221138 (17.82724819986867, 5)
loss 1181.013427734375
Current State,action,reward,Response time,Next State:  (5, 17.82724819986867) 3 24.0 1895.57195892 (17.229782241685768, 6)
loss 515.5396728515625
Current State,action,reward,Response time,Next State:  (6, 17.229782241685768) 4 22.0 1536.96126404 (16.84211602880065, 8)
loss 582.0131225585938
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 3 21.0 1355.37749867 (16.237094554670044, 9)
loss 588.9898071289062
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 4 19.0 1273.04930988 (15.950694610794756, 11)
loss 898.6121826171875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 649.7494506835938
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 406.08880615234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 4 19.0 1202.54023315 (15.446694946204717, 11)
loss 812.134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 1217.6226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 996.2660522460938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 601.3473510742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 610.8328857421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 1077.14111328125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 1349.2850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1077.32958984375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 552.1919555664062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 826.9843139648438
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 820.8436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 1311.490966796875
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 516.1043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1200.072021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 1068.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 1177.2579345703125
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 403.8066711425781
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 774.753173828125
############ Running episode number: 60  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 476.76739501953125
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 1235.6732177734375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 589.1346435546875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 4 19.0 993.95437024 (11.469111876584304, 11)
loss 890.7627563476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 1037.5023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 518.9285888671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 622.6022338867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 506.0446472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 1151.52197265625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 714.1720581054688
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 1058.018310546875
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 438.4754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 401.6766662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 625.6107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 964.1846313476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1482.4444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 956.7676391601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 595.5459594726562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 520.3961791992188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 426.18939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 984.7481079101562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1203.3740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 489.03302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 402.4546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 394.54364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 394.79827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 402.4073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 924.407470703125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 617.1029052734375
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 468.0879211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 611.6023559570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 395.20660400390625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 405.7315368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 962.8168334960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 1636.401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 1085.5848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 523.8701171875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 1227.75927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 402.5128479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 565.0930786132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 405.8871154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1130.986328125
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 713.0842895507812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 490.218017578125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 509.3892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1246.9927978515625
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 583.3748779296875
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 926.3403930664062
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 1 21.0 996.702699398 (11.819721938468785, 9)
loss 506.7368469238281
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 550.842529296875
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 396.946533203125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 440.53363037109375
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 4 19.0 1112.429735 (13.649658108197247, 11)
loss 405.33929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1016.1882934570312
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 1289.1478271484375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 4 19.0 1156.21398489 (15.353965082180355, 11)
loss 508.721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 629.7557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 406.1824645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1008.8518676757812
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 398.0422668457031
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 1282.4228515625
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 1 22.0 1348.01745033 (17.944480812078613, 8)
loss 428.8836975097656
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 3 21.0 1419.80498244 (18.385807405229915, 9)
loss 520.5733642578125
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 400.50848388671875
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 2 20.0 1368.06085906 (19.02839494033929, 10)
loss 1157.11962890625
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 0 22.0 1387.00434183 (19.286321916040979, 8)
loss 1390.6370849609375
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 399.0038757324219
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 402.54437255859375
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 1785.1474609375
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 582.6593017578125
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 394.4500732421875
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 510.96734619140625
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 424.24005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1937.19384765625
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 401.85882568359375
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 4 19.0 1385.00495784 (17.82724819986867, 11)
loss 637.290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 515.5616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 1007.0274047851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 424.6253967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 891.7793579101562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 421.7626647949219
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 1025.590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 604.103759765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 522.504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 387.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 405.8772888183594
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 1222.20556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 479.9833679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1596.470458984375
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 1237.046630859375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 1593.5443115234375
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 433.8144836425781
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 4 19.0 1232.44771583 (16.147078378791146, 11)
loss 428.8435974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 434.8769836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 628.0778198242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 409.6581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 467.6092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 428.2264709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1755.1429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 404.6485290527344
############ Running episode number: 61  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 585.1083984375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 1105.7359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1134.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 627.9283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 628.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1212.582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1563.3399658203125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 413.8192138671875
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 788.4163208007812
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 1 21.0 957.495664348 (10.816918347608043, 9)
loss 1036.3480224609375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 1698.157470703125
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
loss 519.3579711914062
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 4 19.0 986.786261176 (10.772009508959538, 11)
loss 440.1932067871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 389.73052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 426.42608642578125
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 679.0722045898438
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 0 22.0 937.427755072 (10.553846649940214, 8)
loss 526.4598999023438
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 3 21.0 987.860883917 (10.489125480251131, 9)
loss 1230.376953125
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 0 23.0 972.171495057 (10.448897752470936, 7)
loss 1403.042236328125
Current State,action,reward,Response time,Next State:  (7, 10.448897752470936) 3 22.0 1026.97030049 (10.433149880183072, 8)
loss 395.3804931640625
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 3 21.0 980.806785952 (10.44185150623065, 9)
loss 1035.9241943359375
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 2 21.0 969.696935814 (10.370942817486826, 9)
loss 517.1102905273438
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 591.7760009765625
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 438.0685729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 592.460693359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 510.6330261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 381.55877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 387.73175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1127.564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 396.43963623046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 506.12432861328125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 1212.821533203125
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 1 22.0 959.563389179 (10.276491935146446, 8)
loss 402.2821350097656
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 4 20.0 971.650944469 (10.236991269871366, 10)
loss 395.5316467285156
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 389.6544494628906
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 386.5939636230469
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 403.33197021484375
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 1 21.0 924.913936648 (10.333617326102203, 9)
loss 398.8588562011719
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 2 21.0 964.03141062 (10.390165524255663, 9)
loss 408.7138977050781
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 4 19.0 966.991429728 (10.425974763084863, 11)
loss 920.8167114257812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 387.8886413574219
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 392.55401611328125
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 391.130615234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 4 19.0 941.225969064 (10.771376986314287, 11)
loss 412.1587219238281
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 402.6263122558594
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 627.9338989257812
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 435.4307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 492.34710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 392.09783935546875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 383.4869689941406
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 1010.9343872070312
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 2 20.0 1040.79092857 (13.168618569876575, 10)
loss 1342.8885498046875
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 765.100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1083.415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 597.9268188476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 1783.8729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 923.7250366210938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 481.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 592.7147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1211.4921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 581.19873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 918.6995849609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 588.0597534179688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 399.2318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 578.29345703125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 606.5506591796875
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 583.4920654296875
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 592.7940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 393.07659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 433.8144836425781
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 394.75360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 387.4939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 771.9777221679688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 581.9752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 588.5994873046875
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 1931.9677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 396.5484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 1035.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 581.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 392.3992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 518.647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 401.61181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 389.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 1187.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 394.351318359375
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 399.1146545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 386.45440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 385.42535400390625
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 812.8814697265625
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 393.8078308105469
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 394.7823791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1215.42822265625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 711.0274658203125
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 1018.6939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1020.4099731445312
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 1100.3819580078125
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 505.6436462402344
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 1 21.0 1270.73108663 (16.845818065953559, 9)
loss 1189.5628662109375
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 2 21.0 1304.91298164 (17.052961248403161, 9)
loss 693.7454833984375
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 385.4844055175781
############ Running episode number: 62  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 589.4344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 915.4634399414062
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 595.8075561523438
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 423.83087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 398.1993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 391.53472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 511.07684326171875
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 382.288330078125
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 851.339599609375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 2 20.0 957.495664348 (10.816918347608043, 10)
loss 404.4122619628906
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 2 20.0 951.434021987 (10.819208572963639, 10)
loss 1400.034912109375
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 0 22.0 951.555504911 (10.768325938188134, 8)
loss 502.2972106933594
Current State,action,reward,Response time,Next State:  (8, 10.768325938188134) 1 23.0 1000.39608195 (10.772009508959538, 7)
loss 861.3744506835938
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 3 22.0 1047.1055689 (10.644925616761762, 8)
loss 586.4700317382812
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 2 22.0 993.183975462 (10.58735855349979, 8)
loss 966.4979248046875
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 390.3169250488281
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 0 23.0 975.508144832 (10.553846649940214, 7)
loss 402.7510681152344
Current State,action,reward,Response time,Next State:  (7, 10.553846649940214) 1 24.0 1033.5103727 (10.489125480251131, 6)
loss 398.4268798828125
Current State,action,reward,Response time,Next State:  (6, 10.489125480251131) 0 26.0 1091.14025199 (10.448897752470936, 4)
loss 590.3944091796875
Current State,action,reward,Response time,Next State:  (4, 10.448897752470936) 3 25.0 1372.11052534 (10.433149880183072, 5)
loss 424.1916809082031
Current State,action,reward,Response time,Next State:  (5, 10.433149880183072) 0 27.0 1216.18578302 (10.44185150623065, 3)
loss 435.476806640625
Current State,action,reward,Response time,Next State:  (3, 10.44185150623065) 3 26.0 1954.03509421 (10.370942817486826, 4)
loss 777.9098510742188
Current State,action,reward,Response time,Next State:  (4, 10.370942817486826) 3 25.0 1363.25450251 (10.42733414151318, 5)
loss 396.8193664550781
Current State,action,reward,Response time,Next State:  (5, 10.42733414151318) 3 24.0 1215.65142003 (10.388469398680568, 6)
loss 389.9659118652344
Current State,action,reward,Response time,Next State:  (6, 10.388469398680568) 3 23.0 1084.48294874 (10.344006106602812, 7)
loss 1021.7760620117188
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 4 21.0 1020.43379601 (10.319026962956018, 9)
loss 1203.7301025390625
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 924.646484375
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 1 21.0 924.133757854 (10.278181486298042, 9)
loss 976.6708984375
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 2 21.0 961.129617982 (10.268274366284802, 9)
loss 1199.4940185546875
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 0 23.0 960.611029141 (10.335411397720526, 7)
loss 1205.7333984375
Current State,action,reward,Response time,Next State:  (7, 10.335411397720526) 3 22.0 1019.89820185 (10.305649118067803, 8)
loss 381.8199157714844
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 0 24.0 973.355030047 (10.24826025489064, 6)
loss 392.4495544433594
Current State,action,reward,Response time,Next State:  (6, 10.24826025489064) 3 23.0 1075.20964131 (10.276491935146446, 7)
loss 389.46685791015625
Current State,action,reward,Response time,Next State:  (7, 10.276491935146446) 0 25.0 1016.22653355 (10.236991269871366, 5)
loss 599.2733154296875
Current State,action,reward,Response time,Next State:  (5, 10.236991269871366) 0 27.0 1198.16229423 (10.236272697871373, 3)
loss 461.56732177734375
Current State,action,reward,Response time,Next State:  (3, 10.236272697871373) 3 26.0 1913.60365711 (10.369891240151098, 4)
loss 396.8113708496094
Current State,action,reward,Response time,Next State:  (4, 10.369891240151098) 3 25.0 1363.1350387 (10.316955310454549, 5)
loss 489.2057189941406
Current State,action,reward,Response time,Next State:  (5, 10.316955310454549) 3 24.0 1205.50956788 (10.333617326102203, 6)
loss 1739.9373779296875
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 0 26.0 1080.85508169 (10.390165524255663, 4)
loss 1198.789306640625
Current State,action,reward,Response time,Next State:  (4, 10.390165524255663) 3 25.0 1365.43828638 (10.425974763084863, 5)
loss 1048.776611328125
Current State,action,reward,Response time,Next State:  (5, 10.425974763084863) 0 27.0 1215.52651732 (10.546025383098053, 3)
loss 584.0345458984375
Current State,action,reward,Response time,Next State:  (3, 10.546025383098053) 3 26.0 1974.52309871 (10.655373370049301, 4)
loss 1811.72314453125
Current State,action,reward,Response time,Next State:  (4, 10.655373370049301) 3 25.0 1395.56706193 (10.624473674922116, 5)
loss 600.3540649414062
Current State,action,reward,Response time,Next State:  (5, 10.624473674922116) 0 25.1156724308 1233.76503821 (10.771376986314287, 3)
loss 484.8785705566406
Current State,action,reward,Response time,Next State:  (3, 10.771376986314287) 3 26.0 2018.84327569 (10.924797168745895, 4)
loss 1574.0555419921875
Current State,action,reward,Response time,Next State:  (4, 10.924797168745895) 3 25.0 1426.17478823 (11.039747673816453, 5)
loss 637.6978759765625
Current State,action,reward,Response time,Next State:  (5, 11.039747673816453) 4 23.0 1271.92133555 (11.271571944085663, 7)
loss 1547.454345703125
Current State,action,reward,Response time,Next State:  (7, 11.271571944085663) 1 24.0 1078.23666679 (11.670334358779868, 6)
loss 597.442626953125
Current State,action,reward,Response time,Next State:  (6, 11.670334358779868) 3 23.0 1169.26435121 (11.819721938468785, 7)
loss 436.0535583496094
Current State,action,reward,Response time,Next State:  (7, 11.819721938468785) 4 21.0 1112.39558253 (12.19918626616789, 9)
loss 1020.5610961914062
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 598.5481567382812
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 1032.734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 403.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 770.5420532226562
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 444.3546142578125
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 398.3725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1127.9581298828125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 1400.2027587890625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 0 22.0 1251.130943 (16.871606159345866, 8)
loss 588.7388305664062
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 3 21.0 1357.1010433 (17.534967586021782, 9)
loss 475.20477294921875
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 395.59619140625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 4 19.0 1314.91162813 (17.944480812078613, 11)
loss 395.50555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 2140.911865234375
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 1018.8275756835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 588.1101684570312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 967.184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 860.6007690429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 394.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 425.9338073730469
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 473.6741027832031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 601.033203125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 783.20654296875
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 484.07275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 439.4048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 398.2684326171875
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 396.6239318847656
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 1 21.0 1323.29060362 (17.229782241685768, 9)
loss 1038.9024658203125
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 2 21.0 1325.01161138 (16.84211602880065, 9)
loss 2020.9349365234375
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 0 23.0 1304.71919827 (16.237094554670044, 7)
loss 426.20147705078125
Current State,action,reward,Response time,Next State:  (7, 16.237094554670044) 3 22.0 1387.67180358 (15.950694610794756, 8)
loss 410.124267578125
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 3 21.0 1303.27855664 (15.828704162850809, 9)
loss 1822.594482421875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 1045.1058349609375
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 0 22.0 1202.54023315 (15.446694946204717, 8)
loss 717.1524658203125
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 3 21.0 1273.82239956 (15.750501603468638, 9)
loss 393.06658935546875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 2 21.0 1247.57857022 (15.817158911312735, 9)
loss 405.1048278808594
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 594.2091674804688
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 390.5784606933594
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 595.3532104492188
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 1 21.0 1223.96796344 (16.004586266677634, 9)
loss 913.6312866210938
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 1014.65087890625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 4 19.0 1227.30449265 (15.947547279389703, 11)
loss 405.1457824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 399.9822692871094
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 599.9972534179688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 1307.6947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 632.4489135742188
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 639.6876220703125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 598.506103515625
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 387.61273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 506.66162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 485.8273010253906
############ Running episode number: 63  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1029.732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 513.946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 395.258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 924.0894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 383.3682861328125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 1210.3101806640625
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 393.4024658203125
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 407.045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 984.6825561523438
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 586.326416015625
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 2 21.0 989.329834005 (10.819208572963639, 9)
loss 1401.9669189453125
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 1 22.0 989.449716 (10.768325938188134, 8)
loss 1435.5230712890625
Current State,action,reward,Response time,Next State:  (8, 10.768325938188134) 3 21.0 1000.39608195 (10.772009508959538, 9)
loss 546.4566650390625
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 4 19.0 986.979077927 (10.644925616761762, 11)
loss 398.8346252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 394.0230712890625
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 924.6647338867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 393.08154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 937.1069946289062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 385.1565856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 394.7253723144531
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 510.9632568359375
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 2 21.0 969.696935814 (10.370942817486826, 9)
loss 792.7078857421875
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 2 21.0 965.985215893 (10.42733414151318, 9)
loss 390.5492858886719
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 1 22.0 968.937023414 (10.388469398680568, 8)
loss 814.709716796875
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 1 23.0 978.19544437 (10.344006106602812, 7)
loss 1103.465087890625
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 3 22.0 1020.43379601 (10.319026962956018, 8)
loss 391.2909240722656
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 2 22.0 974.136895449 (10.30224719189987, 8)
loss 695.2186889648438
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 2 22.0 973.15620517 (10.278181486298042, 8)
loss 770.513427734375
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 392.6319580078125
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 395.6434326171875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 4 19.0 925.892923039 (10.305649118067803, 11)
loss 603.3082885742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 391.0959167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 591.7039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 390.8517150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 492.4743957519531
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 583.2387084960938
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 387.1768493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 1025.60400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 778.2274780273438
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 774.210205078125
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 787.3665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 675.0782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 587.8535766601562
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 485.06231689453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 1510.640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 584.7863159179688
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 508.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 926.790283203125
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 400.5196838378906
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 403.8453674316406
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 1 21.0 1024.75516863 (12.501496275411796, 9)
loss 443.6796569824219
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 0 23.0 1077.50917513 (13.168618569876575, 7)
loss 584.42822265625
Current State,action,reward,Response time,Next State:  (7, 13.168618569876575) 3 22.0 1196.45441106 (13.649658108197247, 8)
loss 396.64715576171875
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 1 23.0 1168.79494995 (14.283719188889453, 7)
loss 474.6290588378906
Current State,action,reward,Response time,Next State:  (7, 14.283719188889453) 3 22.0 1265.94383637 (14.677479537099185, 8)
loss 406.1033020019531
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 3 21.0 1228.86576266 (15.353965082180355, 9)
loss 1165.477783203125
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 517.2982177734375
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 1124.0416259765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 442.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1295.2581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 433.12725830078125
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 400.4931335449219
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 1286.150390625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 410.05450439453125
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 798.5048828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 407.0539855957031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 1213.770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 395.7429504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 480.8063049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 586.3731079101562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 1196.7052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 792.9700317382812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 924.707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 588.4107055664062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 1200.2774658203125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 401.495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 587.1318969726562
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 405.7391052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 1817.4820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 1208.37939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 780.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 389.5313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 926.09716796875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 1117.38623046875
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 2 20.0 1213.1314661 (15.817158911312735, 10)
loss 1213.26220703125
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 0 22.0 1216.66724247 (15.829956988360925, 8)
loss 621.4305419921875
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
loss 1201.0535888671875
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 0 23.0 1255.00488935 (15.954793861767499, 7)
loss 593.95654296875
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 0 25.0 1370.07974724 (16.004586266677634, 5)
loss 396.86163330078125
Current State,action,reward,Response time,Next State:  (5, 16.004586266677634) 4 23.0 1728.10173237 (16.017694914042416, 7)
loss 594.360595703125
Current State,action,reward,Response time,Next State:  (7, 16.017694914042416) 1 24.0 1373.9995352 (15.947547279389703, 6)
loss 984.03662109375
Current State,action,reward,Response time,Next State:  (6, 15.947547279389703) 3 23.0 1452.15538995 (16.11465619633363, 7)
loss 1027.5552978515625
Current State,action,reward,Response time,Next State:  (7, 16.11465619633363) 3 22.0 1380.04184534 (16.147078378791146, 8)
loss 1728.1376953125
Current State,action,reward,Response time,Next State:  (8, 16.147078378791146) 3 21.0 1314.7561657 (16.229253414601111, 9)
loss 434.0564880371094
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 1201.0338134765625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 4 19.0 1242.02029803 (16.667936385136993, 11)
loss 434.3034973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 475.2298583984375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 397.382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1025.387939453125
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 1544.8924560546875
############ Running episode number: 64  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1201.9542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 588.2101440429688
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 439.34967041015625
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 415.50408935546875
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 1205.6324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 386.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 391.685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 437.7908935546875
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 598.6016845703125
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 1396.7755126953125
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 0 21.0 939.691239608 (10.819208572963639, 9)
loss 1035.220947265625
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 591.4171752929688
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 781.685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 776.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 925.3539428710938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 1248.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 1015.2833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 760.8637084960938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 407.68145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 480.10125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 594.9893188476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1020.10693359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 435.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 1020.8656616210938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 634.2031860351562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 407.47845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 383.2001953125
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 395.7414855957031
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 1018.2704467773438
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 1 21.0 922.331700166 (10.335411397720526, 9)
loss 436.5490417480469
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 1 22.0 964.125321415 (10.305649118067803, 8)
loss 405.26348876953125
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 4 20.0 973.355030047 (10.24826025489064, 10)
loss 605.1340942382812
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 437.8086853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 717.7020874023438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 397.0461730957031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 1135.5404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 774.8657836914062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 432.8432922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 387.3621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 596.864990234375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 2 19.0 919.032945938 (10.546025383098053, 11)
loss 475.1192626953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 949.9808349609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 596.5540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 588.7622680664062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 399.89215087890625
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 395.7853698730469
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 2 21.0 1000.99384957 (11.271571944085663, 9)
loss 1241.5631103515625
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 1015.8240966796875
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 1213.4154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 607.9520263671875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 400.6522216796875
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 771.8773193359375
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 925.2122802734375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 4 19.0 1101.69413046 (14.283719188889453, 11)
loss 593.4639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 402.5203552246094
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 1012.13525390625
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 1 21.0 1192.09754638 (15.836943704090487, 9)
loss 399.01104736328125
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 913.6362915039062
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 398.2855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 580.744140625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 762.337158203125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 4 19.0 1348.01745033 (17.944480812078613, 11)
loss 394.08367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 930.572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 592.5439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 402.4163513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 1386.8994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 1381.2698974609375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 1383.4622802734375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 4 19.0 1396.82133527 (19.140765783401285, 11)
loss 423.17816162109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 1023.3425903320312
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 490.5899353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1209.5166015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 788.60009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 601.8148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 429.7092590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 524.012451171875
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 1111.418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 403.7707824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 773.2774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 925.905517578125
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 1057.219482421875
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 399.5146179199219
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 397.338623046875
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 936.344970703125
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 0 22.0 1213.1314661 (15.817158911312735, 8)
loss 396.2714538574219
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 2 22.0 1295.47409005 (15.829956988360925, 8)
loss 1921.87744140625
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
loss 1207.6854248046875
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 0 23.0 1255.00488935 (15.954793861767499, 7)
loss 937.2913818359375
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 0 25.0 1370.07974724 (16.004586266677634, 5)
loss 580.9725952148438
Current State,action,reward,Response time,Next State:  (5, 16.004586266677634) 3 24.0 1728.10173237 (16.017694914042416, 6)
loss 394.0508728027344
Current State,action,reward,Response time,Next State:  (6, 16.017694914042416) 3 23.0 1456.7948918 (15.947547279389703, 7)
loss 1011.0069580078125
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 3 22.0 1369.62816392 (16.11465619633363, 8)
loss 386.3182678222656
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 3 21.0 1312.86125789 (16.147078378791146, 9)
loss 407.1287536621094
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 1016.9912719726562
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 679.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 393.9053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 760.8992309570312
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 929.1640014648438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 398.75482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 400.1974182128906
############ Running episode number: 65  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 401.92913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 940.0350952148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 385.7860412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 1120.8577880859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 389.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 402.4090881347656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 392.7060852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 396.1585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 400.64984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 2011.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 931.648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 1210.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 397.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 1662.041259765625
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 589.4329223632812
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 3 20.0 977.313511661 (10.552868829802469, 10)
loss 391.7476501464844
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 1 21.0 937.427755072 (10.553846649940214, 9)
loss 389.83624267578125
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 2 21.0 975.559328891 (10.489125480251131, 9)
loss 441.5546569824219
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 1 22.0 972.171495057 (10.448897752470936, 8)
loss 392.3714294433594
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 1 23.0 981.727167119 (10.433149880183072, 7)
loss 480.085693359375
Current State,action,reward,Response time,Next State:  (7, 10.433149880183072) 4 21.0 1025.98894457 (10.44185150623065, 9)
loss 399.0652160644531
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 402.92779541015625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 0 22.0 927.777654938 (10.42733414151318, 8)
loss 959.6848754882812
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 0 24.0 980.466886297 (10.388469398680568, 6)
loss 717.6365356445312
Current State,action,reward,Response time,Next State:  (6, 10.388469398680568) 3 23.0 1084.48294874 (10.344006106602812, 7)
loss 917.793212890625
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 4 21.0 1020.43379601 (10.319026962956018, 9)
loss 420.97174072265625
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 2 21.0 963.267677113 (10.30224719189987, 9)
loss 779.6300048828125
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 2118.605712890625
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 474.1564636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 384.5909118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1315.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 396.52215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 399.2615051269531
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 406.56036376953125
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 591.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 388.1581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 592.2630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 1185.71337890625
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 407.5723876953125
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 637.6185302734375
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 385.3183288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1014.4843139648438
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 391.2143859863281
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 4 19.0 979.256305105 (10.771376986314287, 11)
loss 600.9291381835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 595.6749267578125
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 401.0600280761719
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 1 22.0 1000.99384957 (11.271571944085663, 8)
loss 775.81689453125
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 3 21.0 1029.8081916 (11.670334358779868, 9)
loss 1593.799560546875
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 398.1622314453125
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 0 22.0 1004.62682792 (12.19918626616789, 8)
loss 958.9070434570312
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 3 21.0 1084.02242049 (12.501496275411796, 9)
loss 386.61077880859375
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
loss 928.3179321289062
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 4 19.0 1076.17782493 (13.649658108197247, 11)
loss 1105.36083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1936.2437744140625
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 1208.8326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 1118.6844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 590.1211547851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 921.0830688476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 387.1497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1733.4251708984375
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 1407.553955078125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 937.91552734375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 4 19.0 1329.50910109 (18.385807405229915, 11)
loss 397.0487060546875
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 593.3926391601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 591.0389404296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 431.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 430.4264221191406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 398.7381896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 398.2122497558594
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 783.392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 403.3553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 402.9231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 582.68896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 579.3754272460938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 930.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 409.0775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 397.48876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 396.90966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 391.21820068359375
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 441.8846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 385.9368591308594
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 386.7544250488281
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 4 19.0 1202.54023315 (15.446694946204717, 11)
loss 402.9811096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 587.4012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 398.74920654296875
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 1414.103759765625
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 2 20.0 1217.34610485 (15.892373986997768, 10)
loss 950.9865112304688
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 670.2269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1022.7280883789062
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 397.61029052734375
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 469.4991760253906
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 395.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 672.5882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 400.7822570800781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 399.8226013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 481.7729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 589.341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 399.7066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1138.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 488.37939453125
############ Running episode number: 66  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 389.3160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 396.95843505859375
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 1017.50830078125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 389.380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 407.21282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 400.247802734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 769.4300537109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 922.4948120117188
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 399.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 672.6307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 587.246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 389.2200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 484.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 480.5910949707031
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 920.3179321289062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 1592.2611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 388.7137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 773.8402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 528.3380126953125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 1016.9036254882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 778.9816284179688
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 1582.041015625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 4 19.0 927.777654938 (10.42733414151318, 11)
loss 597.9805297851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 2550.719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 1114.332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1034.211669921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 406.351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 399.2994689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 769.496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 384.8583068847656
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 391.526123046875
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 0 23.0 962.567412952 (10.24826025489064, 7)
loss 487.272216796875
Current State,action,reward,Response time,Next State:  (7, 10.24826025489064) 0 25.0 1014.46722752 (10.276491935146446, 5)
loss 593.5306396484375
Current State,action,reward,Response time,Next State:  (5, 10.276491935146446) 4 23.0 1201.79170309 (10.236991269871366, 7)
loss 1019.2099609375
Current State,action,reward,Response time,Next State:  (7, 10.236991269871366) 0 25.0 1013.76498121 (10.236272697871373, 5)
loss 1522.2579345703125
Current State,action,reward,Response time,Next State:  (5, 10.236272697871373) 3 24.0 1198.09627024 (10.369891240151098, 6)
loss 389.6188659667969
Current State,action,reward,Response time,Next State:  (6, 10.369891240151098) 3 23.0 1083.25420594 (10.316955310454549, 7)
loss 399.2777099609375
Current State,action,reward,Response time,Next State:  (7, 10.316955310454549) 1 24.0 1018.74807882 (10.333617326102203, 6)
loss 408.4028625488281
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 0 26.0 1080.85508169 (10.390165524255663, 4)
loss 1209.48388671875
Current State,action,reward,Response time,Next State:  (4, 10.390165524255663) 4 24.0 1365.43828638 (10.425974763084863, 6)
loss 392.7198181152344
Current State,action,reward,Response time,Next State:  (6, 10.425974763084863) 1 25.0 1086.96352001 (10.546025383098053, 5)
loss 597.7688598632812
Current State,action,reward,Response time,Next State:  (5, 10.546025383098053) 3 24.0 1226.55703492 (10.655373370049301, 6)
loss 384.06488037109375
Current State,action,reward,Response time,Next State:  (6, 10.655373370049301) 3 23.0 1102.13573879 (10.624473674922116, 7)
loss 681.10595703125
Current State,action,reward,Response time,Next State:  (7, 10.624473674922116) 1 24.0 1037.91161802 (10.771376986314287, 6)
loss 1022.027099609375
Current State,action,reward,Response time,Next State:  (6, 10.771376986314287) 3 23.0 1109.80811426 (10.924797168745895, 7)
loss 589.9425048828125
Current State,action,reward,Response time,Next State:  (7, 10.924797168745895) 3 22.0 1056.62679639 (11.039747673816453, 8)
loss 392.931396484375
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 3 21.0 1016.25926965 (11.271571944085663, 9)
loss 1302.164306640625
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 4 19.0 1013.12870607 (11.670334358779868, 11)
loss 487.4514465332031
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 395.7760314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1717.2156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1293.3492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 385.081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 402.5863342285156
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 1194.0303955078125
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 931.2872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 863.0018920898438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1014.3649291992188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 400.08599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1125.30859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 399.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1387.9805908203125
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 398.6829833984375
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 1 22.0 1362.4225545 (18.385807405229915, 8)
loss 399.1029968261719
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 3 21.0 1445.59822471 (18.671267839956315, 9)
loss 398.8191833496094
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 395.2138671875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 4 19.0 1387.00434183 (19.286321916040979, 11)
loss 1205.91845703125
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 847.2093505859375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 4 19.0 1403.55780672 (19.213467265587269, 11)
loss 590.7931518554688
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 414.8366394042969
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 487.9795837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 388.8556213378906
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 788.9780883789062
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 590.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 590.3180541992188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 401.9331359863281
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 602.6085815429688
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 610.2536010742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 390.5207214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 398.3475646972656
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 932.6654052734375
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 439.0755615234375
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 1046.2449951171875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 0 22.0 1202.54023315 (15.446694946204717, 8)
loss 630.9840698242188
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 1 23.0 1273.82239956 (15.750501603468638, 7)
loss 613.5125122070312
Current State,action,reward,Response time,Next State:  (7, 15.750501603468638) 3 22.0 1357.34892154 (15.817158911312735, 8)
loss 529.7380981445312
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
loss 604.4392700195312
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 936.6637573242188
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 1 21.0 1220.65695786 (15.954793861767499, 9)
loss 396.9129333496094
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 393.4454650878906
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 1382.5511474609375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 392.826904296875
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 597.3585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 398.4548034667969
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 410.5152893066406
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 1117.305419921875
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 1387.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 398.82861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 432.47705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 479.7630310058594
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 0 21.0 1269.21706044 (17.215992726625572, 9)
loss 976.442138671875
############ Running episode number: 67  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 599.0567626953125
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 808.6996459960938
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 0 22.0 1002.85899476 (11.61852219546234, 8)
loss 1185.4556884765625
Current State,action,reward,Response time,Next State:  (8, 11.61852219546234) 0 24.0 1050.08562792 (11.469111876584304, 6)
loss 924.4641723632812
Current State,action,reward,Response time,Next State:  (6, 11.469111876584304) 3 23.0 1155.95567613 (11.336751742492702, 7)
loss 626.758544921875
Current State,action,reward,Response time,Next State:  (7, 11.336751742492702) 1 24.0 1082.29845875 (11.25610796929319, 6)
loss 391.0166931152344
Current State,action,reward,Response time,Next State:  (6, 11.25610796929319) 4 22.0 1141.86778813 (11.027107764209074, 8)
loss 1304.725341796875
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 2 22.0 1015.52053272 (10.995673623987257, 8)
loss 392.07427978515625
Current State,action,reward,Response time,Next State:  (8, 10.995673623987257) 2 22.0 1013.68337084 (10.931193889570471, 8)
loss 596.0939331054688
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 2 22.0 1009.91486598 (10.816918347608043, 8)
loss 594.4216918945312
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 1 23.0 1003.23605536 (10.819208572963639, 7)
loss 600.6953735351562
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 0 25.0 1050.04686027 (10.768325938188134, 5)
loss 1011.6437377929688
Current State,action,reward,Response time,Next State:  (5, 10.768325938188134) 3 24.0 1246.98250365 (10.772009508959538, 6)
loss 397.3558654785156
Current State,action,reward,Response time,Next State:  (6, 10.772009508959538) 3 23.0 1109.84994875 (10.644925616761762, 7)
loss 400.3706970214844
Current State,action,reward,Response time,Next State:  (7, 10.644925616761762) 3 22.0 1039.18611617 (10.58735855349979, 8)
loss 419.2427673339844
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 942.3717651367188
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 601.083984375
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 592.8052368164062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 396.6134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 402.41571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 412.40313720703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 978.8606567382812
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 399.60101318359375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 4 19.0 930.768881517 (10.388469398680568, 11)
loss 434.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 415.6324157714844
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 425.7828674316406
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 1 22.0 963.267677113 (10.30224719189987, 8)
loss 433.3443298339844
Current State,action,reward,Response time,Next State:  (8, 10.30224719189987) 4 20.0 973.15620517 (10.278181486298042, 10)
loss 482.89959716796875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 390.1454162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 402.3810729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 395.6903381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1166.8411865234375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 603.2002563476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 1632.4600830078125
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 396.0313415527344
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 1019.9742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 404.9374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 407.53314208984375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 408.5887451171875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 1250.060791015625
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 397.4305725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 399.5255432128906
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 413.67205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 1023.1734008789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 391.3538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1400.2666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 924.0842895507812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 406.9031982421875
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 1922.7462158203125
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 451.8756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1214.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 402.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 599.8953857421875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 1031.4951171875
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 0 23.0 1170.79974938 (14.677479537099185, 7)
loss 385.3029479980469
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 3 22.0 1290.48169407 (15.353965082180355, 8)
loss 811.3214111328125
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 4 20.0 1268.40282167 (15.836943704090487, 10)
loss 401.7318420410156
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 2 20.0 1217.71670884 (16.466876895473597, 10)
loss 818.420166015625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 2 20.0 1251.130943 (16.871606159345866, 10)
loss 587.7425537109375
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 0 22.0 1272.5994393 (17.534967586021782, 8)
loss 1211.59375
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 3 21.0 1395.8710659 (17.669285735563751, 9)
loss 1816.8194580078125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 599.2730102539062
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 1208.41162109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 401.6636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 391.26483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 587.9911499023438
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 401.0369873046875
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 1034.676513671875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 4 19.0 1396.82133527 (19.140765783401285, 11)
loss 595.6837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 404.03521728515625
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 1069.6025390625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 1 21.0 1397.37841716 (19.25591252280865, 9)
loss 1308.1837158203125
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 0 23.0 1431.06953264 (19.08360399753829, 7)
loss 393.5805358886719
Current State,action,reward,Response time,Next State:  (7, 19.08360399753829) 1 24.0 1565.05696683 (18.668181536495972, 6)
loss 594.75927734375
Current State,action,reward,Response time,Next State:  (6, 18.668181536495972) 4 22.0 1632.09570747 (18.375894992990247, 8)
loss 399.96783447265625
Current State,action,reward,Response time,Next State:  (8, 18.375894992990247) 3 21.0 1445.01889581 (17.82724819986867, 9)
loss 600.675048828125
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 4 19.0 1356.28600579 (17.229782241685768, 11)
loss 609.3649291992188
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 389.8124084472656
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 2 21.0 1304.71919827 (16.237094554670044, 9)
loss 1034.69384765625
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 0 23.0 1273.04930988 (15.950694610794756, 7)
loss 399.2445983886719
Current State,action,reward,Response time,Next State:  (7, 15.950694610794756) 0 25.0 1369.82429532 (15.828704162850809, 5)
loss 404.087890625
Current State,action,reward,Response time,Next State:  (5, 15.828704162850809) 1 26.0 1711.94129403 (15.550833128512703, 4)
loss 615.2340087890625
Current State,action,reward,Response time,Next State:  (4, 15.550833128512703) 1 -66.8343809505 1951.71278156 (15.446694946204717, 3)
loss 1026.8525390625
Current State,action,reward,Response time,Next State:  (3, 15.446694946204717) 1 -227.932104766 2938.3438095 (15.750501603468638, 2)
loss 1072.912109375
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.750501603468638) 0 -230.005033789 4559.32104766 (15.817158911312735, 2)
loss 1072.252685546875
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.817158911312735) 1 -230.403032241 4580.05033789 (15.829956988360925, 2)
loss 408.6783447265625
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.829956988360925) 1 -232.344094852 4584.03032241 (15.892373986997768, 2)
loss 576.4276123046875
Current State,action,reward,Response time,Next State:  (2, 15.892373986997768) 2 -234.285246906 4603.44094852 (15.954793861767499, 2)
loss 1813.4013671875
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.954793861767499) 1 -235.833706102 4622.85246906 (16.004586266677634, 2)
loss 1303.7843017578125
Current State,action,reward,Response time,Next State:  (2, 16.004586266677634) 4 25.5249670084 4638.33706102 (16.017694914042416, 4)
loss 401.1626892089844
Current State,action,reward,Response time,Next State:  (4, 16.017694914042416) 3 25.0 2004.75032992 (15.947547279389703, 5)
loss 978.8965454101562
Current State,action,reward,Response time,Next State:  (5, 15.947547279389703) 3 24.0 1722.86086353 (16.11465619633363, 6)
loss 1666.4964599609375
Current State,action,reward,Response time,Next State:  (6, 16.11465619633363) 4 22.0 1463.20782432 (16.147078378791146, 8)
loss 1037.4005126953125
Current State,action,reward,Response time,Next State:  (8, 16.147078378791146) 3 21.0 1314.7561657 (16.229253414601111, 9)
loss 437.32794189453125
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 785.4064331054688
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 403.4808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 962.1004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 1034.059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 392.1409606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 933.984375
############ Running episode number: 68  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 2486.7822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 3002.669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 930.2767333984375
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 1583.3892822265625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 0 22.0 986.02903554 (11.336751742492702, 8)
loss 492.9854431152344
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 3 21.0 1033.61761156 (11.25610796929319, 9)
loss 1554.87744140625
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 1211.2506103515625
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 607.22900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 1120.2169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 933.9273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 786.8583984375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 1200.7506103515625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 401.2976989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 852.1906127929688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 1238.6126708984375
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 595.0211791992188
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 1181.7449951171875
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 2 20.0 937.479622653 (10.489125480251131, 10)
loss 405.1750183105469
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 1 21.0 934.046546974 (10.448897752470936, 9)
loss 396.2309875488281
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 1194.9658203125
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 1148.3392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 408.42828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 963.0283813476562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 949.9038696289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 2087.67431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 927.1224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 399.53076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 917.9566040039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 943.688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 481.1174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1956.8409423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 399.37506103515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 2291.52880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 1824.466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 447.6459045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 411.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 928.7903442382812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 828.7951049804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 946.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1026.2874755859375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 1099.6441650390625
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 4 19.0 975.14992417 (10.655373370049301, 11)
loss 837.4028930664062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 603.710205078125
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 941.21240234375
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 1961.7569580078125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 1 21.0 957.1563561 (11.039747673816453, 9)
loss 1049.9637451171875
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 407.1310119628906
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 1495.9229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 441.4233093261719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 396.0013427734375
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 596.5206909179688
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 1076.4404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 590.8439331054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1157.841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 788.7445678710938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 804.470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1124.2918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 395.621826171875
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 1294.0477294921875
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 589.6621704101562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1214.259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 1722.3848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 636.82763671875
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 390.4956970214844
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 640.96728515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 977.701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 392.2060241699219
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 923.5696411132812
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 398.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 441.3999328613281
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 2192.23291015625
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 1653.4173583984375
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 1198.782958984375
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 1117.9078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 913.0968627929688
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 402.8904724121094
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 2 20.0 1323.29060362 (17.229782241685768, 10)
loss 1390.5616455078125
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 0 22.0 1291.59856437 (16.84211602880065, 8)
loss 2090.069580078125
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 4 20.0 1355.37749867 (16.237094554670044, 10)
loss 934.642578125
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 641.7630004882812
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 1478.893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 394.5516662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 1815.7655029296875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 922.9933471679688
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 929.2042846679688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 1461.3031005859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 434.17376708984375
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 497.34710693359375
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 777.7058715820312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 940.65625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 409.2936706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1321.677978515625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 1117.85986328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 1341.391357421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 927.4736938476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 405.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 1991.543212890625
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 987.3671264648438
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 0 23.0 1304.91298164 (17.052961248403161, 7)
loss 940.0818481445312
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 0 25.0 1438.51394887 (17.215992726625572, 5)
loss 984.638916015625
############ Running episode number: 69  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 972.1373901367188
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 2208.59619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 2646.457763671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 403.8526306152344
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 1717.1153564453125
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 1442.7103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 450.017333984375
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 1816.896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 1028.923583984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 1424.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 931.2787475585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 1012.0509643554688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 932.4483032226562
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 407.8446350097656
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 391.8780822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 590.9168090820312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 1467.0181884765625
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 589.5128784179688
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 398.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1510.72509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 795.4165649414062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 588.30615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 946.5872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 404.2047119140625
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 790.4725952148438
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 1749.3961181640625
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 416.8891906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 776.3809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1749.331298828125
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 1960.86083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1147.5596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1556.4078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 390.011474609375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 1725.6456298828125
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 1 21.0 920.672316722 (10.236272697871373, 9)
loss 1079.2734375
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 2 21.0 958.935899728 (10.369891240151098, 9)
loss 1548.107421875
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 1145.34619140625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 4 19.0 924.913936648 (10.333617326102203, 11)
loss 947.4049682617188
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 408.220703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 4 19.0 928.797305964 (10.425974763084863, 11)
loss 952.5931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 980.9362182617188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 439.4937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 915.9443359375
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 1143.7689208984375
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 934.60009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 590.6329956054688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 2019.049072265625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 437.11566162109375
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 390.8760986328125
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 395.6811828613281
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 915.8663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 1643.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 1462.7811279296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 396.7172546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1222.0062255859375
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 909.7964477539062
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 1 21.0 1192.09754638 (15.836943704090487, 9)
loss 1192.4439697265625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 1496.48388671875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 389.5450744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1200.811767578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 431.1703186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 933.2174072265625
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 1148.8707275390625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 388.02947998046875
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 399.8247375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 408.1708679199219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 921.5350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 1471.113037109375
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 1 20.0 1383.38281107 (19.140765783401285, 10)
loss 433.0099182128906
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 1432.12109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 397.1398010253906
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 1473.83544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 1997.244873046875
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 958.0410766601562
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 394.5262756347656
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 1 21.0 1352.39307459 (17.82724819986867, 9)
loss 403.3918151855469
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 403.7513427734375
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 0 22.0 1291.59856437 (16.84211602880065, 8)
loss 403.8758239746094
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 3 21.0 1355.37749867 (16.237094554670044, 9)
loss 918.4072265625
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 1 22.0 1273.04930988 (15.950694610794756, 8)
loss 1515.309326171875
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 3 21.0 1303.27855664 (15.828704162850809, 9)
loss 404.0356750488281
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 397.7154846191406
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 4 19.0 1202.54023315 (15.446694946204717, 11)
loss 909.30322265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 987.9168090820312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 939.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 394.2695007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 990.8356323242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 905.1770629882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 914.0926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 398.4466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 933.3348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 946.5909423828125
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 409.3923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 405.2339782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 433.55450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 909.5771484375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 440.78778076171875
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 4 19.0 1304.41912996 (16.845818065953559, 11)
loss 389.4380798339844
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 1478.8333740234375
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 952.1937866210938
############ Running episode number: 70  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 932.7939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 939.4415893554688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 917.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 398.5575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 969.54052734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 1481.1741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 389.6042175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 918.6187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 924.8504028320312
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 919.9036254882812
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 2 20.0 951.434021987 (10.819208572963639, 10)
loss 942.9924926757812
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 938.283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 395.435302734375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 1447.6983642578125
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 1453.5592041015625
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 930.7672729492188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 388.230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 950.1255493164062
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 939.7600708007812
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 921.5287475585938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 927.7974243164062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 406.1808776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 402.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 918.1339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 398.9808044433594
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 1460.573486328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 405.6016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 930.6787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 406.384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 1441.090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 922.0084228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 918.812255859375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 903.8451538085938
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 396.7648010253906
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 402.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 404.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 943.269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 911.852783203125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 925.0651245117188
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 910.2379150390625
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 953.271240234375
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 393.84429931640625
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 2 20.0 942.865015335 (10.624473674922116, 10)
loss 401.1870422363281
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 938.5398559570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 1461.146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 404.4944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 402.400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 390.83172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 403.5564880371094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 928.5530395507812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 393.2622985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 915.5514526367188
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 388.6066589355469
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 400.213623046875
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 404.1589660644531
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 947.400634765625
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 925.7244262695312
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 4 19.0 1217.71670884 (16.466876895473597, 11)
loss 385.2078552246094
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 439.6996765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 393.4843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 940.2119750976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 959.5203247070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 388.80303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 404.1861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 962.6453247070312
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 912.4108276367188
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 4 19.0 1432.66131431 (19.340464848017284, 11)
loss 955.739990234375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 386.4603576660156
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 921.095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 388.56787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 1450.02685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 396.8136901855469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 925.9509887695312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 387.7327880859375
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 404.0026550292969
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 2 21.0 1385.00495784 (17.82724819986867, 9)
loss 395.3924865722656
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 399.3454284667969
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 1 21.0 1291.59856437 (16.84211602880065, 9)
loss 947.1461791992188
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 920.1201782226562
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 0 22.0 1238.94234737 (15.950694610794756, 8)
loss 394.97662353515625
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 3 21.0 1303.27855664 (15.828704162850809, 9)
loss 918.4166259765625
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 391.35040283203125
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 397.21661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 393.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 935.1181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 400.5202331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 987.5216674804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 434.32421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 396.80462646484375
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 409.5066223144531
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 1470.7801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 396.9051818847656
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 943.8242797851562
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 391.548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 436.2059020996094
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 930.1583862304688
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 0 23.0 1295.6017535 (16.836383524612351, 7)
loss 386.2216796875
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 3 22.0 1425.01753312 (16.845818065953559, 8)
loss 1478.7764892578125
Current State,action,reward,Response time,Next State:  (8, 16.845818065953559) 3 21.0 1355.59386347 (17.052961248403161, 9)
loss 401.3580017089844
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 910.810302734375
############ Running episode number: 71  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 381.245361328125
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 1994.5103759765625
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 393.7653503417969
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 931.6760864257812
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 2 21.0 1023.46894667 (11.336751742492702, 9)
loss 407.294677734375
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 0 23.0 1016.54054685 (11.25610796929319, 7)
loss 396.3520812988281
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 3 22.0 1077.27300243 (11.027107764209074, 8)
loss 1474.228271484375
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 1 23.0 1015.52053272 (10.995673623987257, 7)
loss 401.73876953125
Current State,action,reward,Response time,Next State:  (7, 10.995673623987257) 3 22.0 1061.04358539 (10.931193889570471, 8)
loss 396.0091552734375
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 3 21.0 1009.91486598 (10.816918347608043, 9)
loss 1470.5267333984375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 4 19.0 989.329834005 (10.819208572963639, 11)
loss 391.85968017578125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 408.5572814941406
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 4 19.0 986.786261176 (10.772009508959538, 11)
loss 397.2530212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 397.1881103515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 398.9307861328125
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 395.1517639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 946.82763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1440.1531982421875
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 1446.725341796875
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 1 22.0 970.065772031 (10.433149880183072, 8)
loss 443.9265441894531
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 2 22.0 980.806785952 (10.44185150623065, 8)
loss 397.089111328125
Current State,action,reward,Response time,Next State:  (8, 10.44185150623065) 3 21.0 981.315350702 (10.370942817486826, 9)
loss 393.7108459472656
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 2 21.0 965.985215893 (10.42733414151318, 9)
loss 960.6661987304688
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 934.201904296875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 1468.276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 396.9227294921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 1008.3621215820312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 402.6943664550781
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 925.9900512695312
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 1436.0611572265625
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 924.6644897460938
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 387.5927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 958.0744018554688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 398.81207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 923.60888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 388.41546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 384.8857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 395.5281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 438.3713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 396.492431640625
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 2 19.0 919.032945938 (10.546025383098053, 11)
loss 387.9617614746094
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 924.656494140625
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 395.36669921875
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 1 21.0 941.225969064 (10.771376986314287, 9)
loss 936.453857421875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 0 23.0 986.945968488 (10.924797168745895, 7)
loss 390.3678283691406
Current State,action,reward,Response time,Next State:  (7, 10.924797168745895) 3 22.0 1056.62679639 (11.039747673816453, 8)
loss 925.5612182617188
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 3 21.0 1016.25926965 (11.271571944085663, 9)
loss 908.007080078125
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 396.6937561035156
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 918.8471069335938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1488.31005859375
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 919.0465698242188
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 2 20.0 1040.79092857 (13.168618569876575, 10)
loss 938.5707397460938
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 4 19.0 1076.17782493 (13.649658108197247, 11)
loss 406.3393249511719
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 915.5521850585938
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 408.5938415527344
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 4 19.0 1156.21398489 (15.353965082180355, 11)
loss 1499.508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 402.8212890625
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 926.5543823242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 392.87030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 948.1851196289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 395.229736328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 952.56640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 932.2468872070312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 914.6394653320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1444.7266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 398.8103332519531
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 388.5884704589844
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 962.6327514648438
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 400.31640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 970.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 915.249267578125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 921.3921508789062
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 2 20.0 1399.0728054 (19.08360399753829, 10)
loss 397.4835205078125
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 396.7244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 404.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 946.1699829101562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 914.60400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 948.1666870117188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 936.7164916992188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 929.940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 396.0294494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 444.22674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 939.574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 399.54376220703125
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 1 20.0 1200.39231205 (15.817158911312735, 10)
loss 920.9052734375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 396.7243957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 1467.402587890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 942.1124267578125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 384.8598937988281
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 2 20.0 1226.60915635 (16.017694914042416, 10)
loss 974.4607543945312
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 916.2086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1455.860595703125
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 912.207763671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 399.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1499.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 387.2743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 394.4471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 409.6631164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 969.8515625
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 401.4427185058594
############ Running episode number: 72  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 936.5784301757812
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 2 21.0 1049.87192659 (11.786394321941378, 9)
loss 394.4930725097656
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 917.0654296875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 393.4348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 393.7369689941406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 397.0348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1452.2528076171875
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 388.50860595703125
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 0 22.0 960.915933313 (10.931193889570471, 8)
loss 438.516845703125
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 1 23.0 1009.91486598 (10.816918347608043, 7)
loss 911.9414672851562
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 3 22.0 1049.90414092 (10.819208572963639, 8)
loss 390.4549255371094
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 0 24.0 1003.36990711 (10.768325938188134, 6)
loss 919.0648193359375
Current State,action,reward,Response time,Next State:  (6, 10.768325938188134) 3 23.0 1109.60632067 (10.772009508959538, 7)
loss 405.44378662109375
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 2 23.0 1047.1055689 (10.644925616761762, 7)
loss 1455.5155029296875
Current State,action,reward,Response time,Next State:  (7, 10.644925616761762) 3 22.0 1039.18611617 (10.58735855349979, 8)
loss 946.3093872070312
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 0 24.0 989.819480251 (10.552868829802469, 6)
loss 924.4066162109375
Current State,action,reward,Response time,Next State:  (6, 10.552868829802469) 4 22.0 1095.35618014 (10.553846649940214, 8)
loss 943.3497924804688
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 1 23.0 987.860883917 (10.489125480251131, 7)
loss 405.90496826171875
Current State,action,reward,Response time,Next State:  (7, 10.489125480251131) 3 22.0 1029.47716098 (10.448897752470936, 8)
loss 404.7690734863281
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 0 24.0 981.727167119 (10.433149880183072, 6)
loss 935.7465209960938
Current State,action,reward,Response time,Next State:  (6, 10.433149880183072) 3 23.0 1087.43807584 (10.44185150623065, 7)
loss 397.3225402832031
Current State,action,reward,Response time,Next State:  (7, 10.44185150623065) 3 22.0 1026.53120146 (10.370942817486826, 8)
loss 928.5140380859375
Current State,action,reward,Response time,Next State:  (8, 10.370942817486826) 3 21.0 977.171106925 (10.42733414151318, 9)
loss 381.00396728515625
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 942.230712890625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 4 19.0 928.707336523 (10.344006106602812, 11)
loss 392.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 404.0999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 391.135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 399.2257995605469
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 396.55303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 965.4616088867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 909.8302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 402.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 436.1648864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 404.11822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 377.7079162597656
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 396.0874328613281
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 426.6150817871094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 4 19.0 924.913936648 (10.333617326102203, 11)
loss 386.3190002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 905.648193359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 941.4051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 934.7345581054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1483.2398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 927.5360717773438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 388.1100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 389.98370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 956.3252563476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 1467.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 970.1907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 388.82177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 914.3515014648438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 394.140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 389.53802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 391.2896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 384.9795227050781
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 428.9102783203125
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 1527.4818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 919.5673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 397.22125244140625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 386.0648498535156
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 4 19.0 1272.5994393 (17.534967586021782, 11)
loss 389.78082275390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 391.7702941894531
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 915.5684204101562
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 0 22.0 1329.50910109 (18.385807405229915, 8)
loss 908.7527465820312
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 4 20.0 1445.59822471 (18.671267839956315, 10)
loss 928.6038818359375
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 1 21.0 1368.06085906 (19.02839494033929, 9)
loss 930.1144409179688
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 3 20.0 1419.16011 (19.286321916040979, 10)
loss 388.53436279296875
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 393.0911560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 389.8302917480469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 427.32464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 389.2400817871094
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 958.0557250976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 934.2894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 389.5818176269531
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 387.4599609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 981.1654663085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 391.7173767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 399.6331787109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 918.3514404296875
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 401.416015625
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 421.9364318847656
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 429.6464538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 934.4120483398438
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 1431.533447265625
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 1 22.0 1231.67579099 (15.750501603468638, 8)
loss 403.0590515136719
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 3 21.0 1291.57831736 (15.817158911312735, 9)
loss 906.355712890625
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 932.15869140625
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 2 20.0 1217.34610485 (15.892373986997768, 10)
loss 903.1884765625
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 915.656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1471.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 905.923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 390.6767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 945.2522583007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 391.17913818359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 938.5103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 924.9955444335938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 386.2779235839844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 396.5047607421875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 390.4364929199219
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 386.5287170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 397.4557189941406
############ Running episode number: 73  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 932.476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 1452.5411376953125
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 394.9030456542969
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 1 22.0 1031.28983953 (11.469111876584304, 8)
loss 908.5997924804688
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 926.9207763671875
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 392.5543212890625
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 391.16033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 428.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 385.2695617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 394.3651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 2033.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 389.27960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 394.9032897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 398.33380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 930.2131958007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1992.815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 392.5455017089844
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 936.8944091796875
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 1 22.0 972.171495057 (10.448897752470936, 8)
loss 399.59130859375
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 1 23.0 981.727167119 (10.433149880183072, 7)
loss 387.2476806640625
Current State,action,reward,Response time,Next State:  (7, 10.433149880183072) 4 21.0 1025.98894457 (10.44185150623065, 9)
loss 385.0854797363281
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 397.13775634765625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 0 22.0 927.777654938 (10.42733414151318, 8)
loss 909.9932250976562
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 4 20.0 980.466886297 (10.388469398680568, 10)
loss 914.6895141601562
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 0 22.0 928.707336523 (10.344006106602812, 8)
loss 924.5615234375
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 3 21.0 975.596796379 (10.319026962956018, 9)
loss 940.6151123046875
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 911.87744140625
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 934.7941284179688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 443.0506896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 911.6631469726562
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 390.53997802734375
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 1466.4200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 388.3390808105469
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 431.7654724121094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 4 19.0 920.672316722 (10.236272697871373, 11)
loss 945.9088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 390.3374328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 384.2836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 923.8240356445312
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 1994.760009765625
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 389.4582824707031
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 1 21.0 930.696774523 (10.546025383098053, 9)
loss 1459.8880615234375
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 399.3213806152344
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 0 22.0 942.865015335 (10.624473674922116, 8)
loss 390.34063720703125
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 391.2138671875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 386.361328125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 1449.45361328125
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 385.6101989746094
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 935.7754516601562
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 392.25665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 383.4120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 919.56103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 901.40966796875
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 394.63201904296875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 393.15386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 389.228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 397.12261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 405.1417541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 381.0716552734375
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 944.2948608398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 389.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 914.5975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 391.62060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 397.7743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 1436.7509765625
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 924.8045043945312
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 383.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 925.1224975585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 1498.1766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1475.8887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 394.31024169921875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 397.57525634765625
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 388.9220886230469
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 2 20.0 1399.0728054 (19.08360399753829, 10)
loss 384.9442443847656
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 387.87274169921875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 384.212158203125
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 384.0307312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 919.7619018554688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 928.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 915.2625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 1501.845947265625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 951.21826171875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 914.1734619140625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 4 19.0 1202.54023315 (15.446694946204717, 11)
loss 395.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 939.4222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 395.92266845703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 393.4125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 389.3821716308594
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 396.48785400390625
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 389.6741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 385.7882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 382.690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 928.670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 944.6889038085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 392.0761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 378.1863098144531
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 389.63330078125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 388.59619140625
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 381.115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 383.44805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 381.55255126953125
############ Running episode number: 74  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 915.6356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 432.92791748046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 919.270751953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 914.2212524414062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 2062.5712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 422.7858581542969
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 384.7983093261719
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 0 22.0 962.583328739 (10.995673623987257, 8)
loss 945.3997802734375
Current State,action,reward,Response time,Next State:  (8, 10.995673623987257) 3 21.0 1013.68337084 (10.931193889570471, 9)
loss 1448.589599609375
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 1 22.0 995.311594677 (10.816918347608043, 8)
loss 401.1497497558594
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 3 21.0 1003.23605536 (10.819208572963639, 9)
loss 382.1741638183594
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 389.1583557128906
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 0 22.0 948.856481751 (10.772009508959538, 8)
loss 925.3416748046875
Current State,action,reward,Response time,Next State:  (8, 10.772009508959538) 3 21.0 1000.61136749 (10.644925616761762, 9)
loss 899.9007568359375
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 386.33087158203125
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 386.76226806640625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 980.271240234375
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 1 22.0 975.559328891 (10.489125480251131, 8)
loss 965.7503662109375
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 3 21.0 984.078268423 (10.448897752470936, 9)
loss 392.2933654785156
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 1452.6124267578125
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 902.2681274414062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 392.0898132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 916.5048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 390.48828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 927.0397338867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1485.4053955078125
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 396.741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 389.36822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1472.9422607421875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 389.1181640625
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 910.8919067382812
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 382.21575927734375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 435.8134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 935.7327880859375
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 959.87646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 420.1003723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 949.963134765625
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 908.903076171875
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 429.3352355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 898.6968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 390.56024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 433.4133605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 2007.7803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 922.0552978515625
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 392.2783508300781
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 387.63397216796875
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 0 22.0 963.253801267 (11.271571944085663, 8)
loss 391.0235595703125
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 2 22.0 1029.8081916 (11.670334358779868, 8)
loss 440.4970397949219
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 3 21.0 1053.11377918 (11.819721938468785, 9)
loss 1442.884521484375
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 904.798583984375
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 2 20.0 1024.75516863 (12.501496275411796, 10)
loss 384.12005615234375
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 1 21.0 1040.79092857 (13.168618569876575, 9)
loss 398.9847717285156
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 907.12939453125
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 1 21.0 1101.69413046 (14.283719188889453, 9)
loss 978.0107421875
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 0 23.0 1170.79974938 (14.677479537099185, 7)
loss 916.5655517578125
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 3 22.0 1290.48169407 (15.353965082180355, 8)
loss 435.1680603027344
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 389.4585266113281
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 392.57659912109375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 4 19.0 1251.130943 (16.871606159345866, 11)
loss 934.7470092773438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 925.3336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 384.0500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 383.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 386.5193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 919.5498657226562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 389.78857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 387.3353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 378.4310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 923.6558227539062
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 394.27581787109375
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 2 21.0 1425.04216908 (19.385636054792762, 9)
loss 388.1861877441406
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 0 23.0 1437.85991935 (19.223969507401588, 7)
loss 919.2994995117188
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 3 22.0 1573.80408654 (19.25591252280865, 8)
loss 390.4217529296875
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 2 22.0 1496.45133993 (19.08360399753829, 8)
loss 387.62164306640625
Current State,action,reward,Response time,Next State:  (8, 19.08360399753829) 2 22.0 1486.3808035 (18.668181536495972, 8)
loss 919.6404418945312
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
loss 957.175537109375
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 4 19.0 1385.00495784 (17.82724819986867, 11)
loss 390.3320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 396.1414489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 965.2138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 932.8645629882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 927.4818725585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 396.24151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 977.1298217773438
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 392.6662292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 394.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 929.7532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 928.8533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 400.1580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 388.3182678222656
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 916.9136962890625
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 1478.12158203125
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 1 21.0 1227.30449265 (15.947547279389703, 9)
loss 385.3326721191406
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 0 23.0 1257.89293893 (16.11465619633363, 7)
loss 912.670654296875
Current State,action,reward,Response time,Next State:  (7, 16.11465619633363) 3 22.0 1380.04184534 (16.147078378791146, 8)
loss 427.5713806152344
Current State,action,reward,Response time,Next State:  (8, 16.147078378791146) 3 21.0 1314.7561657 (16.229253414601111, 9)
loss 384.6103210449219
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 4 19.0 1272.63886489 (16.295120821876548, 11)
loss 427.96063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 389.32757568359375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 427.63494873046875
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 386.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 953.0824584960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 954.1353759765625
############ Running episode number: 75  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 911.3654174804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 388.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 388.5328674316406
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 983.05078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 387.4970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 386.2340087890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 387.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 430.1818542480469
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 385.1384582519531
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 2 20.0 957.495664348 (10.816918347608043, 10)
loss 387.70794677734375
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 1435.16455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 896.4991455078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 927.6290283203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 1486.7291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 381.8434753417969
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 1987.1629638671875
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 0 23.0 975.508144832 (10.553846649940214, 7)
loss 387.9801025390625
Current State,action,reward,Response time,Next State:  (7, 10.553846649940214) 3 22.0 1033.5103727 (10.489125480251131, 8)
loss 395.78497314453125
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 3 21.0 984.078268423 (10.448897752470936, 9)
loss 388.5024719238281
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 389.3599548339844
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 389.7349853515625
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 1474.576171875
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 1475.8341064453125
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 395.41082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 387.65069580078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 912.55224609375
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 389.36883544921875
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 2 19.0 912.494916918 (10.278181486298042, 11)
loss 386.29833984375
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 393.21484375
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 386.4862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 397.37249755859375
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 383.5388488769531
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 970.6181030273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 912.1956787109375
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 914.4172973632812
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 920.4712524414062
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 381.7957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 394.8065490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 393.8624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1458.8623046875
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 390.1762390136719
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 389.5777587890625
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 384.8438720703125
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 387.6145324707031
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 399.60040283203125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 2 20.0 957.1563561 (11.039747673816453, 10)
loss 385.90478515625
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 430.32110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 387.433837890625
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 387.96246337890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 391.6273193359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 938.203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 917.3423461914062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 390.4764099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1451.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 384.7690734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 388.24493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 915.4363403320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 916.666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 385.08331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 383.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 2012.0126953125
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 954.4673461914062
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 2021.881103515625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 1 21.0 1352.9188695 (18.671267839956315, 9)
loss 935.614501953125
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 1422.7960205078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 4 19.0 1387.00434183 (19.286321916040979, 11)
loss 389.37701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 1446.189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 397.12335205078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 399.3621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 385.6905212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 382.3884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 384.3555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 391.0969543457031
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 1460.8961181640625
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 979.2006225585938
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 914.997314453125
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 388.5191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 929.413818359375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 380.9889831542969
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 385.6275939941406
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 1 21.0 1223.7505224 (15.828704162850809, 9)
loss 912.998779296875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 1 22.0 1251.67208827 (15.550833128512703, 8)
loss 908.1387939453125
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 2 22.0 1279.90873428 (15.446694946204717, 8)
loss 380.11370849609375
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 3 21.0 1273.82239956 (15.750501603468638, 9)
loss 388.43341064453125
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 387.2085876464844
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 391.2911376953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 389.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 934.1787719726562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 971.9769897460938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 393.64776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 950.5245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 922.72802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 938.76318359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 391.0985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1490.656982421875
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 386.5028991699219
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 4 19.0 1295.6017535 (16.836383524612351, 11)
loss 382.987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 389.6200866699219
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 395.327392578125
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 980.7955932617188
############ Running episode number: 76  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 389.2232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 377.48297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 396.2217102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 2029.482666015625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 394.6786804199219
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 387.3435974121094
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 397.10711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 431.5791931152344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 915.0338745117188
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 389.9926452636719
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 1 22.0 989.329834005 (10.819208572963639, 8)
loss 920.5982055664062
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 0 24.0 1003.36990711 (10.768325938188134, 6)
loss 387.4909973144531
Current State,action,reward,Response time,Next State:  (6, 10.768325938188134) 1 25.0 1109.60632067 (10.772009508959538, 5)
loss 1446.329833984375
Current State,action,reward,Response time,Next State:  (5, 10.772009508959538) 2 25.0 1247.32095832 (10.644925616761762, 5)
loss 390.3926086425781
Current State,action,reward,Response time,Next State:  (5, 10.644925616761762) 4 23.0 1235.64420805 (10.58735855349979, 7)
loss 400.50750732421875
Current State,action,reward,Response time,Next State:  (7, 10.58735855349979) 4 21.0 1035.59872498 (10.552868829802469, 9)
loss 921.6592407226562
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 920.4459838867188
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 389.5729675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 914.9512939453125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 380.84417724609375
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 1 22.0 969.241448633 (10.44185150623065, 8)
loss 939.1709594726562
Current State,action,reward,Response time,Next State:  (8, 10.44185150623065) 0 24.0 981.315350702 (10.370942817486826, 6)
loss 384.82244873046875
Current State,action,reward,Response time,Next State:  (6, 10.370942817486826) 4 22.0 1083.32375633 (10.42733414151318, 8)
loss 924.0322265625
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 3 21.0 980.466886297 (10.388469398680568, 9)
loss 379.2051086425781
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 4 19.0 966.902645924 (10.344006106602812, 11)
loss 922.1502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 388.6427917480469
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 389.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 388.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 917.6084594726562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 397.8417053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 940.05517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 933.6301879882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 387.39031982421875
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 389.4488220214844
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 431.87017822265625
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 398.1564025878906
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 904.4413452148438
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 380.51251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 386.77178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 943.3595581054688
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 922.4259033203125
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 387.0938720703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 4 19.0 942.865015335 (10.624473674922116, 11)
loss 378.0166931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 390.5798034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 942.4028930664062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1462.78125
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 1442.516357421875
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 429.4365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 385.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 392.384521484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 913.6785278320312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 420.1980285644531
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 384.30694580078125
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 392.7033996582031
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 930.0113525390625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 374.6126403808594
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 4 19.0 1226.82184023 (15.836943704090487, 11)
loss 386.8320617675781
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 387.3613586425781
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 930.9420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 921.9840087890625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 1467.676025390625
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 382.06268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 385.0743408203125
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 950.1804809570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 386.0067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 392.47308349609375
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 906.7402954101562
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 925.5855102539062
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 390.2298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 386.2259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 923.6542358398438
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 935.9178466796875
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 1 21.0 1399.0728054 (19.08360399753829, 9)
loss 380.2932434082031
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 379.4559326171875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 914.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 943.5914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 910.5736694335938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 906.3136596679688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 392.725830078125
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 378.03033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 1483.3387451171875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 1455.561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 379.88092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 392.44696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 385.8232727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 930.1651000976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 384.9145812988281
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 389.8424377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 386.10296630859375
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 909.2130737304688
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 913.16064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 381.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 932.38818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 912.17822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 388.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 929.207275390625
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 379.4782409667969
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 4 19.0 1304.41912996 (16.845818065953559, 11)
loss 394.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1528.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 907.6763305664062
############ Running episode number: 77  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 386.7273254394531
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 388.3556823730469
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 2 20.0 1002.85899476 (11.61852219546234, 10)
loss 389.38519287109375
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 379.8985900878906
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 382.62835693359375
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 4 19.0 1016.54054685 (11.25610796929319, 11)
loss 926.542724609375
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 376.78082275390625
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 4 19.0 1000.33221268 (10.995673623987257, 11)
loss 1476.7989501953125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 387.5586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 1451.9654541015625
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 0 21.0 939.691239608 (10.819208572963639, 9)
loss 962.3584594726562
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 385.59234619140625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 901.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 395.1020812988281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 389.7316589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 387.4317626953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 939.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 900.3960571289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 949.6959228515625
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 421.6296081542969
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 4 19.0 969.241448633 (10.44185150623065, 11)
loss 388.4150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 933.6588745117188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 1461.5679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 1473.3599853515625
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 1981.40869140625
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 388.4497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 385.75439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 386.3778381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1426.4576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 929.7027587890625
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 386.1142272949219
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 385.2079772949219
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 383.13189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 382.73480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 926.9912719726562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 391.0769958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 912.638916015625
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 381.26715087890625
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 1 22.0 964.03141062 (10.390165524255663, 8)
loss 385.98712158203125
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
loss 939.1292114257812
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 2 21.0 968.865866662 (10.546025383098053, 9)
loss 942.4144287109375
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 381.1834716796875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 4 19.0 942.865015335 (10.624473674922116, 11)
loss 925.5695190429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 393.5224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 385.1266784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 389.8159484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 389.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 922.2957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 384.6219787597656
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 386.22442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 380.57806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 2021.8555908203125
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 930.88232421875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 385.95672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1438.906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 425.3849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1419.8834228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 2007.168212890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 395.4201354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 386.4571838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 942.192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 380.7663879394531
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 388.0069885253906
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 2 21.0 1385.5238237 (18.671267839956315, 9)
loss 1454.1688232421875
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 388.15155029296875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 4 19.0 1387.00434183 (19.286321916040979, 11)
loss 381.07366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 386.26708984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 894.4619750976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 915.3944702148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 386.46173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 386.4219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 385.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 428.3993835449219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 384.04290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 913.3974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 378.8379821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 921.93798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 383.24554443359375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 934.971435546875
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 1419.5155029296875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 914.3822631835938
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 964.1697998046875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 377.5712585449219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 382.1846618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 918.0245361328125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 1479.595458984375
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 0 22.0 1217.34610485 (15.892373986997768, 8)
loss 381.8888854980469
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 0 24.0 1299.87001973 (15.954793861767499, 6)
loss 384.0632629394531
Current State,action,reward,Response time,Next State:  (6, 15.954793861767499) 0 25.6738869972 1452.63467243 (16.004586266677634, 4)
loss 384.066162109375
Current State,action,reward,Response time,Next State:  (4, 16.004586266677634) 2 25.5249670084 2003.26113003 (16.017694914042416, 4)
loss 385.8370361328125
Current State,action,reward,Response time,Next State:  (4, 16.017694914042416) 0 -234.059890506 2004.75032992 (15.947547279389703, 2)
loss 387.03472900390625
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.947547279389703) 1 -239.256693905 4620.59890506 (16.11465619633363, 2)
loss 384.0439453125
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.11465619633363) 1 -240.264968694 4672.56693905 (16.147078378791146, 2)
loss 380.6285400390625
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.147078378791146) 1 -242.820472693 4682.64968694 (16.229253414601111, 2)
loss 378.9169921875
Current State,action,reward,Response time,Next State:  (2, 16.229253414601111) 4 22.3732869054 4708.20472693 (16.295120821876548, 4)
loss 377.6227111816406
Current State,action,reward,Response time,Next State:  (4, 16.295120821876548) 3 25.0 2036.26713095 (16.667936385136993, 5)
loss 377.9057922363281
Current State,action,reward,Response time,Next State:  (5, 16.667936385136993) 3 24.0 1789.05181455 (16.836383524612351, 6)
loss 933.733154296875
Current State,action,reward,Response time,Next State:  (6, 16.836383524612351) 2 24.0 1510.94222459 (16.845818065953559, 6)
loss 955.2291259765625
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 3 23.0 1511.56621672 (17.052961248403161, 7)
loss 378.6614685058594
Current State,action,reward,Response time,Next State:  (7, 17.052961248403161) 1 24.0 1438.51394887 (17.215992726625572, 6)
loss 380.6300048828125
############ Running episode number: 78  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 959.6181640625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 956.8392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 379.9936218261719
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 390.42779541015625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 385.7712707519531
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 385.1882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1544.6429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 379.66650390625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 384.6761779785156
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 391.81878662109375
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 389.42236328125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 387.8708190917969
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 972.7290649414062
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 0 23.0 986.979077927 (10.644925616761762, 7)
loss 391.1504821777344
Current State,action,reward,Response time,Next State:  (7, 10.644925616761762) 3 22.0 1039.18611617 (10.58735855349979, 8)
loss 931.6173095703125
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 392.37957763671875
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 4 19.0 975.508144832 (10.553846649940214, 11)
loss 379.779541015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 965.6090698242188
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 1517.3726806640625
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 387.3209228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 1506.17236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 382.6344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 977.41162109375
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 952.442138671875
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 380.19879150390625
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 380.2416076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 933.4566040039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 375.9811096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 967.8519287109375
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 383.99432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 382.3847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 937.023193359375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 380.628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 384.1150207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 379.7839050292969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 962.164794921875
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 383.8540344238281
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 2 21.0 963.159236328 (10.333617326102203, 9)
loss 388.1009521484375
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 377.1165771484375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 382.4900207519531
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 380.38525390625
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 385.268310546875
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 388.0032653808594
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 381.63397216796875
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 383.4557189941406
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 964.9139404296875
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 1 21.0 963.253801267 (11.271571944085663, 9)
loss 381.5497741699219
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 2 21.0 1013.12870607 (11.670334358779868, 9)
loss 380.57196044921875
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 961.4063110351562
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 4 19.0 1004.62682792 (12.19918626616789, 11)
loss 1511.708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 387.8053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 379.91888427734375
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 950.3106079101562
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 934.0890502929688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 384.95440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 389.49127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 380.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 931.7157592773438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 928.1738891601562
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 956.9136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 386.5873107910156
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 383.01055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 960.716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 1542.36376953125
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 381.791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 383.7781066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 388.67999267578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 378.3871765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 928.8495483398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 383.5368347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 388.6568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 384.9601135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 960.6812133789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 959.843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 386.87213134765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 969.734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 926.4999389648438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 387.9574279785156
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 379.4270935058594
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 390.53253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 382.8199157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 380.0665588378906
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 380.8329162597656
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 939.58349609375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 388.0220947265625
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
loss 381.31103515625
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 386.9462585449219
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 392.4962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 384.2027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 928.7433471679688
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 381.08966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 384.7283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 957.4448852539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 975.293212890625
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 383.0653991699219
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 1505.3162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 389.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 396.1791687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 380.18670654296875
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 382.4660339355469
############ Running episode number: 79  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 952.1471557617188
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 955.876708984375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 2 21.0 1040.0771169 (11.61852219546234, 9)
loss 955.1591186523438
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 384.5692138671875
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 0 22.0 986.02903554 (11.336751742492702, 8)
loss 385.1703186035156
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 4 20.0 1033.61761156 (11.25610796929319, 10)
loss 386.7604064941406
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 4 19.0 974.730436685 (11.027107764209074, 11)
loss 390.3994140625
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 378.3618469238281
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 934.9557495117188
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 0 22.0 957.495664348 (10.816918347608043, 8)
loss 389.5803527832031
Current State,action,reward,Response time,Next State:  (8, 10.816918347608043) 2 22.0 1003.23605536 (10.819208572963639, 8)
loss 391.71380615234375
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 3 21.0 1003.36990711 (10.768325938188134, 9)
loss 379.1957702636719
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 979.781982421875
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 390.3070068359375
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 386.3804016113281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 387.5482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 387.24090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 386.0750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 380.87030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 374.1724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 391.6685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 380.80548095703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 389.126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 385.3338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 937.206787109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 955.7412719726562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 377.5654602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 384.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 390.16943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 959.8189086914062
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 394.31732177734375
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 383.3398742675781
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 379.62542724609375
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 1501.0601806640625
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 386.36553955078125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 380.76898193359375
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 389.8310241699219
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 956.0977172851562
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 383.1287841796875
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 2 19.0 917.140709305 (10.425974763084863, 11)
loss 389.3222351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 378.1194152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 2090.068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 1519.8092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 384.81488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 390.2383728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 385.7961730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 386.7199401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 385.3651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 394.36590576171875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 384.2677307128906
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 381.52740478515625
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 0 22.0 1040.79092857 (13.168618569876575, 8)
loss 383.3575439453125
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 4 20.0 1140.68069275 (13.649658108197247, 10)
loss 959.9014282226562
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 377.58746337890625
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 382.60308837890625
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 393.0159912109375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 4 19.0 1192.09754638 (15.836943704090487, 11)
loss 953.0001220703125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 377.8817138671875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 385.2197570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 960.2203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 959.981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 1527.58349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 975.765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 386.81951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 969.0513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 2114.17578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 380.3915710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 379.62347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 971.0987548828125
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 375.06744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 382.1593933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 392.6845397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 390.5071716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 382.40545654296875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 384.74468994140625
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 951.6324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 952.1809692382812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 384.8843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 386.6120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 388.30023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 380.90863037109375
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 376.8686828613281
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 1 21.0 1202.54023315 (15.446694946204717, 9)
loss 386.74761962890625
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 977.0907592773438
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 2 20.0 1213.1314661 (15.817158911312735, 10)
loss 382.0085754394531
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 0 22.0 1216.66724247 (15.829956988360925, 8)
loss 390.03338623046875
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
loss 381.3269958496094
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 0 23.0 1255.00488935 (15.954793861767499, 7)
loss 387.2889404296875
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 1 24.0 1370.07974724 (16.004586266677634, 6)
loss 955.7181396484375
Current State,action,reward,Response time,Next State:  (6, 16.004586266677634) 3 23.0 1455.92789758 (16.017694914042416, 7)
loss 961.0401611328125
Current State,action,reward,Response time,Next State:  (7, 16.017694914042416) 4 21.0 1373.9995352 (15.947547279389703, 9)
loss 935.6414794921875
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 392.7951965332031
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 4 19.0 1232.44771583 (16.147078378791146, 11)
loss 383.6195983886719
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 969.3445434570312
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 393.4164123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 964.1932983398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 382.2629699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 377.8553466796875
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 933.4459838867188
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 965.6154174804688
############ Running episode number: 80  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 384.75677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 972.3438110351562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 395.3309020996094
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 953.8433837890625
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 2 21.0 1023.46894667 (11.336751742492702, 9)
loss 386.9151611328125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 932.5604858398438
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 385.1470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 389.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 979.1656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 385.6128234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 385.60595703125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 379.3127136230469
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 4 19.0 986.786261176 (10.772009508959538, 11)
loss 382.452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 953.212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 927.4304809570312
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 976.8685302734375
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 383.54644775390625
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 381.6805114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 385.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 394.29522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 387.42266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 392.9964294433594
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 389.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 392.4765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 391.0514831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 959.1316528320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 381.65277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 388.2283935546875
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 1519.51806640625
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 381.1973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 386.97174072265625
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 1539.7298583984375
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 384.5237731933594
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 1 21.0 922.767593645 (10.236991269871366, 9)
loss 1499.055419921875
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 384.0626220703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 4 19.0 920.634200723 (10.369891240151098, 11)
loss 388.4391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 926.7049560546875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 972.394287109375
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 4 19.0 964.03141062 (10.390165524255663, 11)
loss 397.7289733886719
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 2 19.0 917.140709305 (10.425974763084863, 11)
loss 380.5909423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 935.4744873046875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 948.5440063476562
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 376.62481689453125
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 956.1408081054688
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 958.2634887695312
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 1 22.0 994.97675791 (11.039747673816453, 8)
loss 389.19903564453125
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 3 21.0 1016.25926965 (11.271571944085663, 9)
loss 391.3396301269531
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 382.27374267578125
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 974.0641479492188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 925.9326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 385.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 959.9091186523438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 381.9617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 385.6369934082031
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 381.144775390625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 0 22.0 1156.21398489 (15.353965082180355, 8)
loss 384.94329833984375
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 921.0596923828125
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 937.9190063476562
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 383.36456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 972.830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 386.4349365234375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 1550.0333251953125
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 385.5250549316406
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 382.4425048828125
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 388.6848449707031
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 0 22.0 1387.00434183 (19.286321916040979, 8)
loss 929.5584106445312
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 384.8693542480469
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 389.7328186035156
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 4 19.0 1396.82133527 (19.140765783401285, 11)
loss 388.5982666015625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 933.6356201171875
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 2 21.0 1437.85991935 (19.223969507401588, 9)
loss 951.1242065429688
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 4 19.0 1429.39747342 (19.25591252280865, 11)
loss 958.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 927.8705444335938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 388.8468933105469
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 382.97216796875
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 2 20.0 1352.39307459 (17.82724819986867, 10)
loss 380.9023742675781
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 931.10986328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 388.1495056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 388.1874084472656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 389.4882507324219
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 390.6990051269531
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 0 23.0 1251.67208827 (15.550833128512703, 7)
loss 937.0736083984375
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 1 24.0 1344.9062349 (15.446694946204717, 6)
loss 378.08197021484375
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 1 25.0 1419.0294644 (15.750501603468638, 5)
loss 395.6614685058594
Current State,action,reward,Response time,Next State:  (5, 15.750501603468638) 4 23.0 1704.75586919 (15.817158911312735, 7)
loss 964.5731811523438
Current State,action,reward,Response time,Next State:  (7, 15.817158911312735) 4 21.0 1361.50278706 (15.829956988360925, 9)
loss 389.7167663574219
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 1 22.0 1251.7376675 (15.892373986997768, 8)
loss 1539.2474365234375
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 1 23.0 1299.87001973 (15.954793861767499, 7)
loss 385.9649658203125
Current State,action,reward,Response time,Next State:  (7, 15.954793861767499) 2 23.0 1370.07974724 (16.004586266677634, 7)
loss 388.23858642578125
Current State,action,reward,Response time,Next State:  (7, 16.004586266677634) 3 22.0 1373.18264715 (16.017694914042416, 8)
loss 386.066162109375
Current State,action,reward,Response time,Next State:  (8, 16.017694914042416) 3 21.0 1307.19437562 (15.947547279389703, 9)
loss 929.4028930664062
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 384.696044921875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 4 19.0 1232.44771583 (16.147078378791146, 11)
loss 387.7709045410156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 382.068359375
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 392.6573486328125
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 1 22.0 1276.0866986 (16.667936385136993, 8)
loss 390.8000793457031
Current State,action,reward,Response time,Next State:  (8, 16.667936385136993) 3 21.0 1345.1976051 (16.836383524612351, 9)
loss 389.4674072265625
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 952.4235229492188
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 394.42333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 384.72686767578125
############ Running episode number: 81  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 381.73687744140625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 383.21112060546875
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 0 23.0 1040.0771169 (11.61852219546234, 7)
loss 394.4037170410156
Current State,action,reward,Response time,Next State:  (7, 11.61852219546234) 3 22.0 1099.85747227 (11.469111876584304, 8)
loss 392.5498046875
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 1514.9422607421875
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 4 19.0 1016.54054685 (11.25610796929319, 11)
loss 383.1962890625
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 961.3360595703125
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 2 21.0 1000.33221268 (10.995673623987257, 9)
loss 387.9680480957031
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 4 19.0 998.686790566 (10.931193889570471, 11)
loss 387.72906494140625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 393.4162292480469
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 987.3587036132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 384.9805603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 395.4560241699219
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 389.72821044921875
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 1 21.0 942.310823749 (10.58735855349979, 9)
loss 384.1008605957031
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 4 19.0 977.313511661 (10.552868829802469, 11)
loss 935.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 962.1192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 929.5464477539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 385.7036437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 390.7444152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 391.5783386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 961.306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 382.4856872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 956.565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 392.22314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 383.9883728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 955.4121704101562
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 386.80340576171875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 935.5401611328125
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 385.11700439453125
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 392.9214782714844
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 934.9772338867188
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 382.69464111328125
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 957.8718872070312
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 4 19.0 920.672316722 (10.236272697871373, 11)
loss 927.231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 392.98638916015625
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 929.30810546875
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 2 20.0 924.913936648 (10.333617326102203, 10)
loss 394.93939208984375
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 963.1591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 967.1790161132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 965.7008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 386.4900207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 393.9613342285156
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 385.8524475097656
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 931.3283081054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 386.73602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 387.8116455078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 976.8114624023438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 382.47796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 391.2596435546875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 387.3861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 949.4512329101562
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 378.97674560546875
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 961.840576171875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 2 20.0 1135.32732476 (14.677479537099185, 10)
loss 387.6667175292969
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 382.2900695800781
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 956.7036743164062
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 1 21.0 1217.71670884 (16.466876895473597, 9)
loss 924.3458251953125
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 930.8080444335938
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 1503.01171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 387.3718566894531
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 973.8768920898438
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 956.294921875
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 385.71014404296875
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 974.717041015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 390.68408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 390.38836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 961.1412963867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 379.4130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 946.231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 1516.947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 975.7372436523438
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 930.6564331054688
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 2 20.0 1389.93285614 (18.668181536495972, 10)
loss 386.87823486328125
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 385.15753173828125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 980.2496337890625
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 940.0673217773438
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 383.4942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 384.2207336425781
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 388.6001281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 387.29168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 393.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 929.118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 394.9581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 974.8485717773438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 388.35418701171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 970.9176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 385.8406982421875
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 963.7334594726562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 386.3490905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1549.52294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 383.0862731933594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 390.99444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 391.4587707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 972.45068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 392.3414306640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 384.0087890625
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 387.1352844238281
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 1 22.0 1304.91298164 (17.052961248403161, 8)
loss 390.53167724609375
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 0 24.0 1367.70030431 (17.215992726625572, 6)
loss 383.93359375
############ Running episode number: 82  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 387.2247009277344
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 400.3019104003906
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 382.3130798339844
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 1548.30224609375
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 386.231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 931.0479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 958.962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 389.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 920.5845336914062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 386.97381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 379.9536437988281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 962.6676025390625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 384.7341613769531
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 386.67877197265625
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 1 21.0 942.310823749 (10.58735855349979, 9)
loss 965.4338989257812
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 4 19.0 977.313511661 (10.552868829802469, 11)
loss 386.42034912109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 393.13287353515625
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 386.2774353027344
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 380.9162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 378.2930908203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 387.3024597167969
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 958.0123901367188
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 382.7607727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 376.7665710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 377.96612548828125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 394.1403503417969
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 1 21.0 925.023825574 (10.30224719189987, 9)
loss 961.2457275390625
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 383.93914794921875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 1 21.0 922.857214352 (10.268274366284802, 9)
loss 391.141357421875
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 963.098388671875
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 2 20.0 925.892923039 (10.305649118067803, 10)
loss 974.626220703125
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 929.5305786132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 387.125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 967.0376586914062
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 385.8101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 932.05029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 377.3692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 380.85931396484375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 955.4130859375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 1 22.0 966.991429728 (10.425974763084863, 8)
loss 384.52752685546875
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 3 21.0 980.387437704 (10.546025383098053, 9)
loss 386.13616943359375
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 2 21.0 975.14992417 (10.655373370049301, 9)
loss 960.6814575195312
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 925.61865234375
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 380.8557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 383.3067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 1534.8470458984375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 383.33868408203125
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 960.505615234375
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 2 20.0 996.702699398 (11.819721938468785, 10)
loss 384.0351257324219
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 1 21.0 1004.62682792 (12.19918626616789, 9)
loss 390.9051208496094
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 0 23.0 1061.68473805 (12.501496275411796, 7)
loss 936.2944946289062
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 2 23.0 1154.88153049 (13.168618569876575, 7)
loss 382.7305603027344
Current State,action,reward,Response time,Next State:  (7, 13.168618569876575) 3 22.0 1196.45441106 (13.649658108197247, 8)
loss 383.8003234863281
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 1 23.0 1168.79494995 (14.283719188889453, 7)
loss 928.4796142578125
Current State,action,reward,Response time,Next State:  (7, 14.283719188889453) 1 24.0 1265.94383637 (14.677479537099185, 6)
loss 393.7982482910156
Current State,action,reward,Response time,Next State:  (6, 14.677479537099185) 0 26.0 1368.15424492 (15.353965082180355, 4)
loss 956.1433715820312
Current State,action,reward,Response time,Next State:  (4, 15.353965082180355) 1 -74.5094512724 1929.34770724 (15.836943704090487, 3)
loss 381.5985412597656
Current State,action,reward,Response time,Next State:  (3, 15.836943704090487) 1 -250.210159225 3015.09451272 (16.466876895473597, 2)
loss 935.5088500976562
Current State,action,reward,Response time,Next State:  (2, 16.466876895473597) 3 -94.8582850434 4782.10159225 (16.871606159345866, 3)
loss 470.15423583984375
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 16.871606159345866) 0 -283.425965015 3218.58285043 (17.534967586021782, 2)
loss 379.386474609375
Current State,action,reward,Response time,Next State:  (2, 17.534967586021782) 3 -110.546347383 5114.25965015 (17.669285735563751, 3)
loss 1768.9105224609375
Current State,action,reward,Response time,Next State:  (3, 17.669285735563751) 3 3.63583334771 3375.46347383 (17.944480812078613, 4)
loss 1191.55859375
Current State,action,reward,Response time,Next State:  (4, 17.944480812078613) 1 -124.638267193 2223.64166652 (18.385807405229915, 3)
loss 1195.4085693359375
Current State,action,reward,Response time,Next State:  (3, 18.385807405229915) 3 -4.62078634849 3516.38267193 (18.671267839956315, 4)
loss 374.10662841796875
Current State,action,reward,Response time,Next State:  (4, 18.671267839956315) 0 -329.869018402 2306.20786348 (19.02839494033929, 2)
loss 390.410400390625
Current State,action,reward,Response time,Next State:  (2, 19.02839494033929) 3 -142.34879684 5578.69018402 (19.286321916040979, 3)
loss 955.5286254882812
Action -2 not possible so Scaled down by 1
Current State,action,reward,Response time,Next State:  (3, 19.286321916040979) 0 -339.573862324 3693.4879684 (19.340464848017284, 2)
loss 2501.828125
Current State,action,reward,Response time,Next State:  (2, 19.340464848017284) 2 -335.624453279 5675.73862324 (19.213467265587269, 2)
loss 390.1373291015625
Current State,action,reward,Response time,Next State:  (2, 19.213467265587269) 3 -139.486126476 5636.24453279 (19.140765783401285, 3)
loss 401.5711669921875
Current State,action,reward,Response time,Next State:  (3, 19.140765783401285) 2 -144.302020223 3664.86126476 (19.385636054792762, 3)
loss 696.066162109375
Current State,action,reward,Response time,Next State:  (3, 19.385636054792762) 3 -10.8997199267 3713.02020223 (19.223969507401588, 4)
loss 1121.2401123046875
Current State,action,reward,Response time,Next State:  (4, 19.223969507401588) 3 22.3159190023 2368.99719927 (19.25591252280865, 5)
loss 673.2078247070312
Current State,action,reward,Response time,Next State:  (5, 19.25591252280865) 3 24.0 2026.84080998 (19.08360399753829, 6)
loss 791.045654296875
Current State,action,reward,Response time,Next State:  (6, 19.08360399753829) 3 23.0 1659.57137766 (18.668181536495972, 7)
loss 387.6094665527344
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 1 24.0 1539.16919707 (18.375894992990247, 6)
loss 2133.090576171875
Current State,action,reward,Response time,Next State:  (6, 18.375894992990247) 3 23.0 1612.76413679 (17.82724819986867, 7)
loss 440.79876708984375
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 3 22.0 1486.76498054 (17.229782241685768, 8)
loss 449.1026306152344
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 3 21.0 1378.03457101 (16.84211602880065, 9)
loss 391.2143859863281
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 1 22.0 1304.71919827 (16.237094554670044, 8)
loss 1539.9400634765625
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 3 21.0 1320.01714264 (15.950694610794756, 9)
loss 1533.298095703125
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 0 23.0 1258.0576862 (15.828704162850809, 7)
loss 388.64398193359375
Current State,action,reward,Response time,Next State:  (7, 15.828704162850809) 4 21.0 1362.22224939 (15.550833128512703, 9)
loss 2527.2724609375
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 2473.019287109375
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 397.5871276855469
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 973.3028564453125
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 1 22.0 1251.06775133 (15.829956988360925, 8)
loss 1733.1851806640625
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 3 21.0 1296.22207104 (15.892373986997768, 9)
loss 1130.7679443359375
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 3464.5712890625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 4 19.0 1223.96796344 (16.004586266677634, 11)
loss 387.568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 391.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1774.4954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 935.2539672851562
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 514.3438720703125
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 1561.286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1045.7186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 2858.414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 395.74700927734375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 975.849365234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 4 19.0 1258.27117243 (17.052961248403161, 11)
loss 1123.927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 401.6582946777344
############ Running episode number: 83  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 963.61376953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 4 19.0 1012.7846064 (11.786394321941378, 11)
loss 2583.4794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 977.509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 378.4851989746094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 380.6427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 391.6364440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 382.67230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 2628.320068359375
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 595.996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 389.8661804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 2308.5087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 1468.1939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 2071.7529296875
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 387.5372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 583.0723266601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1335.9119873046875
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 2086.989501953125
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 1 22.0 975.559328891 (10.489125480251131, 8)
loss 3083.241455078125
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 4 20.0 984.078268423 (10.448897752470936, 10)
loss 1555.035400390625
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 3366.70458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 1936.5606689453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 1263.46337890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 733.6944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 1567.7916259765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 1508.4827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1117.4674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 581.3952026367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 384.7067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 385.6659240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 386.8079833984375
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 381.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 586.1423950195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1275.2484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 1200.5408935546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 1014.7149658203125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 1512.00439453125
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 1 22.0 965.930171009 (10.316955310454549, 8)
loss 640.4351806640625
Current State,action,reward,Response time,Next State:  (8, 10.316955310454549) 4 20.0 974.015818144 (10.333617326102203, 10)
loss 384.1604309082031
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 4 19.0 925.797758139 (10.390165524255663, 11)
loss 1181.6734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1553.535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 1483.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1043.34033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 1176.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 2037.7811279296875
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 943.45556640625
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 1131.0548095703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 1118.718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1788.006591796875
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 1575.4288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 384.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1087.310546875
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 2105.62841796875
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 2163.571533203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 4 19.0 1101.69413046 (14.283719188889453, 11)
loss 3359.35791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1469.8406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 2160.0244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1323.676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 928.25830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1531.7548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 504.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 2815.157958984375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 1730.320068359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 534.2024536132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 599.58349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1813.4307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 2248.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 506.4546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 1185.49609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1007.1762084960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 2277.14892578125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 531.37255859375
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 529.4362182617188
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 390.51788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 593.697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 540.1830444335938
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 1502.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 1530.2393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 933.6495971679688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 1767.5584716796875
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 2138.326416015625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 2086.8701171875
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 1011.8798828125
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 1474.5145263671875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 958.8456420898438
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 1774.0328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 1687.0784912109375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 509.43072509765625
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 582.8212280273438
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 927.6083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1675.1131591796875
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 2659.021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1011.431884765625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 386.33367919921875
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 3739.24267578125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 4 19.0 1238.52642122 (16.295120821876548, 11)
loss 931.7470092773438
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 1165.75341796875
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 0 23.0 1295.6017535 (16.836383524612351, 7)
loss 391.52838134765625
Current State,action,reward,Response time,Next State:  (7, 16.836383524612351) 3 22.0 1425.01753312 (16.845818065953559, 8)
loss 1548.2275390625
Current State,action,reward,Response time,Next State:  (8, 16.845818065953559) 3 21.0 1355.59386347 (17.052961248403161, 9)
loss 510.86767578125
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 1783.126708984375
############ Running episode number: 84  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 392.1109619140625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 1015.4557495117188
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 1203.8260498046875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 2116.554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 387.9648742675781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 587.5989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 2651.1337890625
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 1085.159912109375
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 976.5436401367188
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 2654.269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1094.82666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 598.0008544921875
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 387.5447998046875
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 389.283447265625
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 1667.269287109375
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 393.0987854003906
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 1475.6009521484375
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 2 20.0 937.479622653 (10.489125480251131, 10)
loss 468.35162353515625
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 0 22.0 934.046546974 (10.448897752470936, 8)
loss 658.0350341796875
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 0 24.0 981.727167119 (10.433149880183072, 6)
loss 1624.9114990234375
Current State,action,reward,Response time,Next State:  (6, 10.433149880183072) 1 25.0 1087.43807584 (10.44185150623065, 5)
loss 1901.561767578125
Current State,action,reward,Response time,Next State:  (5, 10.44185150623065) 3 24.0 1216.98530774 (10.370942817486826, 6)
loss 970.404052734375
Current State,action,reward,Response time,Next State:  (6, 10.370942817486826) 3 23.0 1083.32375633 (10.42733414151318, 7)
loss 1251.392333984375
Current State,action,reward,Response time,Next State:  (7, 10.42733414151318) 4 21.0 1025.62652674 (10.388469398680568, 9)
loss 2856.448486328125
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 1 22.0 966.902645924 (10.344006106602812, 8)
loss 521.6549072265625
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 3 21.0 975.596796379 (10.319026962956018, 9)
loss 2635.03955078125
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 1728.8558349609375
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 2942.902587890625
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 1662.3790283203125
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 1539.27294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 498.57427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 590.7348022460938
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 1093.7608642578125
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 381.9049987792969
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 385.8063659667969
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 1334.83740234375
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 1 21.0 927.721874973 (10.316955310454549, 9)
loss 509.6492919921875
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 0 23.0 963.159236328 (10.333617326102203, 7)
loss 1480.1160888671875
Current State,action,reward,Response time,Next State:  (7, 10.333617326102203) 3 22.0 1019.78640117 (10.390165524255663, 8)
loss 395.17840576171875
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 0 24.0 978.294574081 (10.425974763084863, 6)
loss 790.9335327148438
Current State,action,reward,Response time,Next State:  (6, 10.425974763084863) 3 23.0 1086.96352001 (10.546025383098053, 7)
loss 385.7691650390625
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 0 25.0 1033.02297692 (10.655373370049301, 5)
loss 381.44647216796875
Current State,action,reward,Response time,Next State:  (5, 10.655373370049301) 3 24.0 1236.60417082 (10.624473674922116, 6)
loss 1520.8175048828125
Current State,action,reward,Response time,Next State:  (6, 10.624473674922116) 0 26.0 1100.09206058 (10.771376986314287, 4)
loss 1119.0308837890625
Current State,action,reward,Response time,Next State:  (4, 10.771376986314287) 3 25.0 1408.74558199 (10.924797168745895, 5)
loss 1556.35791015625
Current State,action,reward,Response time,Next State:  (5, 10.924797168745895) 2 25.0 1261.35942784 (11.039747673816453, 5)
loss 577.3460693359375
Current State,action,reward,Response time,Next State:  (5, 11.039747673816453) 3 24.0 1271.92133555 (11.271571944085663, 6)
loss 929.7723999023438
Current State,action,reward,Response time,Next State:  (6, 11.271571944085663) 3 23.0 1142.8905616 (11.670334358779868, 7)
loss 979.2516479492188
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 3 22.0 1103.08623692 (11.819721938468785, 8)
loss 1475.824462890625
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 1 23.0 1061.84470565 (12.19918626616789, 7)
loss 2054.311767578125
Current State,action,reward,Response time,Next State:  (7, 12.19918626616789) 3 22.0 1136.04255897 (12.501496275411796, 8)
loss 2591.421142578125
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 4 20.0 1101.69086701 (13.168618569876575, 10)
loss 1326.4273681640625
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 2096.608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1572.5806884765625
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 2635.00048828125
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 2210.5986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1542.23193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1388.376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 629.1370239257812
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 1380.52734375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 2747.638427734375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 713.3793334960938
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 955.2872924804688
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 1707.8526611328125
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 0 23.0 1400.46626871 (19.02839494033929, 7)
loss 1219.9488525390625
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 1 24.0 1561.61651886 (19.286321916040979, 6)
loss 1089.2955322265625
Current State,action,reward,Response time,Next State:  (6, 19.286321916040979) 1 21.5390334592 1672.97895956 (19.340464848017284, 5)
loss 1691.7060546875
Current State,action,reward,Response time,Next State:  (5, 19.340464848017284) 3 24.0 2034.60966541 (19.213467265587269, 6)
loss 2130.604248046875
Current State,action,reward,Response time,Next State:  (6, 19.213467265587269) 3 23.0 1668.16041811 (19.140765783401285, 7)
loss 584.48974609375
Current State,action,reward,Response time,Next State:  (7, 19.140765783401285) 3 22.0 1568.61910246 (19.385636054792762, 8)
loss 2346.086669921875
Current State,action,reward,Response time,Next State:  (8, 19.385636054792762) 0 24.0 1504.03300517 (19.223969507401588, 6)
loss 2700.01025390625
Current State,action,reward,Response time,Next State:  (6, 19.223969507401588) 0 -11.2626066047 1668.85502699 (19.25591252280865, 4)
loss 378.84307861328125
Current State,action,reward,Response time,Next State:  (4, 19.25591252280865) 2 -9.30510611419 2372.62606605 (19.08360399753829, 4)
loss 1504.3817138671875
Current State,action,reward,Response time,Next State:  (4, 19.08360399753829) 3 25.0 2353.05106114 (18.668181536495972, 5)
loss 1752.1728515625
Current State,action,reward,Response time,Next State:  (5, 18.668181536495972) 3 24.0 1972.8387816 (18.375894992990247, 6)
loss 472.3880920410156
Current State,action,reward,Response time,Next State:  (6, 18.375894992990247) 3 23.0 1612.76413679 (17.82724819986867, 7)
loss 1724.9349365234375
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 4 21.0 1486.76498054 (17.229782241685768, 9)
loss 1138.2293701171875
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 2 21.0 1325.01161138 (16.84211602880065, 9)
loss 385.5973205566406
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 1143.783447265625
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 387.4111022949219
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 586.198486328125
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 545.6823120117188
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 1506.7799072265625
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 1791.86181640625
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 390.97149658203125
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 2253.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 794.3690795898438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 390.38702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1546.7423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 387.4364013671875
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 1773.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1236.4410400390625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 784.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 604.0470581054688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 2958.971435546875
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 1743.058349609375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 1318.1552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 471.0101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1881.829345703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 1478.4010009765625
############ Running episode number: 85  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 708.6705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 382.708740234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 387.63629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 390.423583984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 603.3232421875
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 1893.2626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1774.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 1358.370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 2089.8408203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 1533.5435791015625
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 0 21.0 939.691239608 (10.819208572963639, 9)
loss 391.3680725097656
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 1510.272216796875
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 955.0753173828125
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 1171.9371337890625
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 2065.68798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 2197.781005859375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 920.666748046875
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 1339.2271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 465.75213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1760.0892333984375
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 452.05474853515625
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 389.23828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 1189.7828369140625
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 592.790283203125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 1 21.0 928.707336523 (10.344006106602812, 9)
loss 1397.647705078125
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 1288.25927734375
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 2 20.0 925.023825574 (10.30224719189987, 10)
loss 385.0511169433594
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 2 20.0 924.133757854 (10.278181486298042, 10)
loss 591.5945434570312
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 2055.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 1752.69677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 2119.931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 980.400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1026.6331787109375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 936.8551025390625
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 0 23.0 958.973513426 (10.236272697871373, 7)
loss 1132.241943359375
Current State,action,reward,Response time,Next State:  (7, 10.236272697871373) 3 22.0 1013.72020216 (10.369891240151098, 8)
loss 1308.5687255859375
Current State,action,reward,Response time,Next State:  (8, 10.369891240151098) 1 23.0 977.109647703 (10.316955310454549, 7)
loss 381.9588928222656
Current State,action,reward,Response time,Next State:  (7, 10.316955310454549) 3 22.0 1018.74807882 (10.333617326102203, 8)
loss 980.8836059570312
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 3 21.0 974.989626232 (10.390165524255663, 9)
loss 1686.43994140625
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 2 21.0 966.991429728 (10.425974763084863, 9)
loss 398.53228759765625
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 4 19.0 968.865866662 (10.546025383098053, 11)
loss 980.6939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1194.8372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 388.19976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 507.2587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 964.2074584960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 658.8853149414062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 391.3620300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1729.4747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 1781.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 478.37030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 582.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 973.030517578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 479.7540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 400.59326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 3236.659423828125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 1193.7423095703125
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 2 21.0 1226.82184023 (15.836943704090487, 9)
loss 591.312744140625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 1540.4423828125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 0 22.0 1251.130943 (16.871606159345866, 8)
loss 528.8836669921875
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 3 21.0 1357.1010433 (17.534967586021782, 9)
loss 471.38671875
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 1537.47412109375
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 2 20.0 1314.91162813 (17.944480812078613, 10)
loss 390.00970458984375
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 0 22.0 1329.50910109 (18.385807405229915, 8)
loss 1193.4854736328125
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 3 21.0 1445.59822471 (18.671267839956315, 9)
loss 511.4443054199219
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 391.3879699707031
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 0 22.0 1387.00434183 (19.286321916040979, 8)
loss 382.083740234375
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 1534.558349609375
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 0 23.0 1435.4954296 (19.213467265587269, 7)
loss 1007.3341674804688
Current State,action,reward,Response time,Next State:  (7, 19.213467265587269) 3 22.0 1573.14962117 (19.140765783401285, 8)
loss 959.88818359375
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 970.9718017578125
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 968.5
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 2 20.0 1397.37841716 (19.25591252280865, 10)
loss 980.695068359375
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 1717.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1189.2193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 1362.0257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 581.1792602539062
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 1505.7139892578125
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 1918.3450927734375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 1475.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 2676.892822265625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 597.4804077148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 1817.8753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 1197.3758544921875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 2222.678466796875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 2875.757080078125
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 1 21.0 1216.66724247 (15.829956988360925, 9)
loss 872.0556030273438
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 4 19.0 1251.7376675 (15.892373986997768, 11)
loss 1380.8408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 2059.720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1327.3619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 1477.6558837890625
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 1582.285888671875
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 543.3876953125
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 2 20.0 1232.44771583 (16.147078378791146, 10)
loss 2324.840087890625
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 1 21.0 1234.16752106 (16.229253414601111, 9)
loss 627.969970703125
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 4 19.0 1272.63886489 (16.295120821876548, 11)
loss 388.5367126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 2457.343505859375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 400.4974060058594
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 1340.5821533203125
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 2124.9677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 2738.0087890625
############ Running episode number: 86  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1560.586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 390.2698669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1662.2513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 956.5733032226562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 2247.68798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1192.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 2185.225830078125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 2108.69921875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 472.9185791015625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 1416.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1086.73095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 390.38079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 3464.080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 1997.610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 2603.35791015625
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 1226.755126953125
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 709.7285766601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1869.117919921875
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 1478.7562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 3117.09375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 661.5731201171875
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 960.116455078125
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 1 22.0 965.985215893 (10.42733414151318, 8)
loss 1137.3084716796875
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 2 22.0 980.466886297 (10.388469398680568, 8)
loss 1711.1749267578125
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 1 23.0 978.19544437 (10.344006106602812, 7)
loss 3252.365966796875
Current State,action,reward,Response time,Next State:  (7, 10.344006106602812) 3 22.0 1020.43379601 (10.319026962956018, 8)
loss 1475.937744140625
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 3 21.0 974.136895449 (10.30224719189987, 9)
loss 390.0586242675781
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 391.3297424316406
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 471.6773986816406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 598.4512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1181.0791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 509.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1010.6273803710938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 597.084228515625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 793.7093505859375
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 2300.023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 1475.297607421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 4 19.0 913.272125304 (10.333617326102203, 11)
loss 389.11553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 1107.504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 4490.654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 709.760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1056.9766845703125
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 1133.8983154296875
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 1 21.0 941.225969064 (10.771376986314287, 9)
loss 595.0106201171875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 392.0029602050781
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 930.3217163085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 389.06500244140625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 1011.6710815429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 593.6029663085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 2091.080810546875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 959.8800659179688
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 0 22.0 1040.79092857 (13.168618569876575, 8)
loss 594.2474975585938
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 1 23.0 1140.68069275 (13.649658108197247, 7)
loss 2050.76171875
Current State,action,reward,Response time,Next State:  (7, 13.649658108197247) 4 21.0 1226.43122257 (14.283719188889453, 9)
loss 390.2768859863281
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 396.1331481933594
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 0 22.0 1156.21398489 (15.353965082180355, 8)
loss 1192.75341796875
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 2223.301513671875
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 1020.1004638671875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 4 19.0 1251.130943 (16.871606159345866, 11)
loss 537.5762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1198.235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1980.7174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 2059.8095703125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 1664.1512451171875
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 1589.870849609375
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 2 20.0 1368.06085906 (19.02839494033929, 10)
loss 959.7984619140625
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 2 20.0 1387.00434183 (19.286321916040979, 10)
loss 478.9828796386719
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 385.62738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 739.9749145507812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1315.8634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 2309.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 983.4754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1202.697021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 962.6712036132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 576.5509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 2640.78173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 1876.4866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 386.62060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 2030.11181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 396.3416442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 387.0397033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 1017.9169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 589.9493408203125
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 1267.321533203125
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 985.1382446289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 3365.21142578125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
loss 1272.9266357421875
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 1598.20849609375
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 2161.235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1319.532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 391.0278625488281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 396.4990539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1583.8878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 1014.3035278320312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 2344.95654296875
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 1565.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 482.9350891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 466.3099670410156
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 475.7234802246094
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 2 20.0 1271.23153331 (17.052961248403161, 10)
loss 436.1376953125
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 383.2935791015625
############ Running episode number: 87  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1559.714599609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 667.4605102539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 595.4178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 962.3623657226562
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 437.6397399902344
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 1 22.0 1016.54054685 (11.25610796929319, 8)
loss 2126.654296875
Current State,action,reward,Response time,Next State:  (8, 11.25610796929319) 3 21.0 1028.90440276 (11.027107764209074, 9)
loss 1746.7916259765625
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 390.5799560546875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 4 19.0 960.915933313 (10.931193889570471, 11)
loss 511.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 1161.527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1465.23095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 2225.603759765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 1462.303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 626.2006225585938
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 549.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1198.3447265625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 1503.2620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 935.9146118164062
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 765.1546020507812
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 0 22.0 931.912703681 (10.433149880183072, 8)
loss 1527.6448974609375
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 2 22.0 980.806785952 (10.44185150623065, 8)
loss 391.9049072265625
Current State,action,reward,Response time,Next State:  (8, 10.44185150623065) 1 23.0 981.315350702 (10.370942817486826, 7)
loss 1165.33349609375
Current State,action,reward,Response time,Next State:  (7, 10.370942817486826) 4 21.0 1022.11240377 (10.42733414151318, 9)
loss 1635.9019775390625
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 1 22.0 968.937023414 (10.388469398680568, 8)
loss 392.1277770996094
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 3 21.0 978.19544437 (10.344006106602812, 9)
loss 382.8421325683594
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 388.02069091796875
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 392.7119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 1125.99755859375
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 1635.323486328125
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 1134.173828125
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 384.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 391.8721923828125
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 2928.987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 745.0758666992188
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 2451.939697265625
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 1023.0811157226562
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 2039.9439697265625
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 385.0121765136719
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 1208.3900146484375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 1 21.0 928.797305964 (10.425974763084863, 9)
loss 2023.6251220703125
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 1 22.0 968.865866662 (10.546025383098053, 8)
loss 1242.6552734375
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 3 21.0 987.40377158 (10.655373370049301, 9)
loss 401.8443298339844
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 2938.285400390625
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 471.5785217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 633.3260498046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 1164.1162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 589.7003173828125
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 547.36572265625
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 390.85540771484375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 2 20.0 1004.62682792 (12.19918626616789, 10)
loss 394.8811340332031
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 1481.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 392.85650634765625
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 396.1536560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 439.0995788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1537.3055419921875
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 1298.0865478515625
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 1265.5870361328125
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 445.9191589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 964.5629272460938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 1482.208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1060.152099609375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 3445.435546875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 4 19.0 1329.50910109 (18.385807405229915, 11)
loss 1164.55615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 1311.915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 393.75714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 1119.2264404296875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 775.900634765625
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 4 19.0 1435.4954296 (19.213467265587269, 11)
loss 386.54119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 968.9965209960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 1679.846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 594.9424438476562
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 2032.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 392.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 3957.51123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 384.09381103515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 1541.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 2324.850341796875
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 384.358642578125
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 603.4711303710938
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 3006.524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 784.73291015625
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 510.6441650390625
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 2 20.0 1202.54023315 (15.446694946204717, 10)
loss 1534.0587158203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 4 19.0 1197.01631782 (15.750501603468638, 11)
loss 1587.376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 630.8040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 477.4214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 382.9267578125
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 1250.0638427734375
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 2 20.0 1223.96796344 (16.004586266677634, 10)
loss 391.3407897949219
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 4 19.0 1226.60915635 (16.017694914042416, 11)
loss 385.8251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 389.9434509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 1347.5985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 578.1185913085938
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 546.0309448242188
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 0 22.0 1238.52642122 (16.295120821876548, 8)
loss 1011.19775390625
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 1 23.0 1323.40847593 (16.667936385136993, 7)
loss 383.1298828125
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 3 22.0 1414.52045804 (16.836383524612351, 8)
loss 1825.6917724609375
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 4 20.0 1355.04246364 (16.845818065953559, 10)
loss 645.6359252929688
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 0 22.0 1271.23153331 (17.052961248403161, 8)
loss 394.630126953125
Current State,action,reward,Response time,Next State:  (8, 17.052961248403161) 3 21.0 1367.70030431 (17.215992726625572, 9)
loss 590.4633178710938
############ Running episode number: 88  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 387.8698425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 395.62542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 398.9277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 2569.547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 580.4226684570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 391.85638427734375
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 387.7351379394531
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 1626.7532958984375
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 445.76470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 1197.179931640625
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 505.1320495605469
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 1594.6656494140625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 581.0897827148438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 600.8638916015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 385.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 396.2134094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 385.75494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1306.1505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 851.1614379882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1348.93310546875
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 403.0623779296875
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 393.89837646484375
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 1 21.0 927.777654938 (10.42733414151318, 9)
loss 387.5963439941406
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 594.9971923828125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 493.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 597.4373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 539.037109375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 384.72650146484375
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 385.84832763671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 1191.4742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 592.4283447265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 529.0460815429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 2626.889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 1195.2501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 2603.919189453125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 545.7510375976562
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 2 21.0 965.930171009 (10.316955310454549, 9)
loss 479.003662109375
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 380.2575378417969
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 1723.046630859375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 665.0943603515625
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 654.13525390625
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 582.414794921875
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 4 19.0 980.873751654 (10.624473674922116, 11)
loss 593.7557983398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 1926.6737060546875
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 516.6751708984375
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 391.0841064453125
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 2 20.0 963.253801267 (11.271571944085663, 10)
loss 476.5870361328125
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 1020.9495239257812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 1930.38330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1960.3289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 3277.238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 384.8299865722656
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 384.88641357421875
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 1 22.0 1137.6097809 (14.283719188889453, 8)
loss 590.7181396484375
Current State,action,reward,Response time,Next State:  (8, 14.283719188889453) 3 21.0 1205.85251983 (14.677479537099185, 9)
loss 391.8069763183594
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 1498.31494140625
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 2 20.0 1192.09754638 (15.836943704090487, 10)
loss 2165.92529296875
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 1 21.0 1217.71670884 (16.466876895473597, 9)
loss 1514.7196044921875
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 2793.693359375
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 385.3439636230469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 385.0238952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 1482.97802734375
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 391.3067321777344
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 390.6368713378906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 1011.0694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 2668.947265625
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 1007.620849609375
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 1486.707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1512.6455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 379.0947570800781
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 1197.829345703125
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 587.6947021484375
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 1744.70849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1333.488037109375
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 594.830078125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 385.4877014160156
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 384.91741943359375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 4 19.0 1291.59856437 (16.84211602880065, 11)
loss 1019.4992065429688
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 383.3288269042969
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 4 19.0 1238.94234737 (15.950694610794756, 11)
loss 581.9789428710938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 1417.37353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 2483.249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 2511.13671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 1477.3153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 699.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 382.32147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 395.61474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 1014.6226806640625
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 395.0212097167969
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 1859.0682373046875
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 2333.5048828125
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 1213.94287109375
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 2314.95068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 530.1834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 391.315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 382.6026306152344
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 1203.4366455078125
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 0 22.0 1270.73108663 (16.845818065953559, 8)
loss 1546.1124267578125
Current State,action,reward,Response time,Next State:  (8, 16.845818065953559) 3 21.0 1355.59386347 (17.052961248403161, 9)
loss 1599.627197265625
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 1395.6580810546875
############ Running episode number: 89  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 637.0255737304688
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 439.7862243652344
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 386.476806640625
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 2634.42578125
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 390.7718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1475.842041015625
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 387.80987548828125
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 391.2529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 860.18359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 583.0634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1225.5941162109375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 385.75262451171875
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 1629.990234375
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 391.55194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 1677.0306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1389.4183349609375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 389.6526794433594
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 1008.8003540039062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 1018.349853515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 484.57220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 1726.541015625
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 651.2318725585938
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 2 21.0 965.985215893 (10.42733414151318, 9)
loss 587.8267211914062
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 1 22.0 968.937023414 (10.388469398680568, 8)
loss 1707.052001953125
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 3 21.0 978.19544437 (10.344006106602812, 9)
loss 1677.722900390625
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 1 22.0 964.575212011 (10.319026962956018, 8)
loss 400.53619384765625
Current State,action,reward,Response time,Next State:  (8, 10.319026962956018) 0 24.0 974.136895449 (10.30224719189987, 6)
loss 1006.0071411132812
Current State,action,reward,Response time,Next State:  (6, 10.30224719189987) 3 23.0 1078.78028906 (10.278181486298042, 7)
loss 386.5589294433594
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 3 22.0 1016.33182085 (10.268274366284802, 8)
loss 1471.623046875
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 2 22.0 971.170670341 (10.335411397720526, 8)
loss 690.7198486328125
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 3 21.0 975.09448038 (10.305649118067803, 9)
loss 1828.87939453125
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 657.5263061523438
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 0 22.0 921.2700698 (10.276491935146446, 8)
loss 398.2113037109375
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 3 21.0 971.650944469 (10.236991269871366, 9)
loss 447.7319030761719
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 4 19.0 958.973513426 (10.236272697871373, 11)
loss 1020.2670288085938
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 790.963623046875
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 399.421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 2776.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 671.0908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 606.6180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 2457.81396484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 643.7708129882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 1535.4805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 387.8538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 450.0086364746094
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 527.915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 579.3140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 2598.242431640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 1715.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1922.5037841796875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 2719.2734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 1381.105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 1830.8087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 867.1687622070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 2217.99267578125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 1482.5889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 743.8585205078125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 1724.3822021484375
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 384.1687927246094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 4 19.0 1272.5994393 (17.534967586021782, 11)
loss 503.3786926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1681.4345703125
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 1472.5255126953125
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 598.1605834960938
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 1450.0384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1022.870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 1385.324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 438.74041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 1009.3609008789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1018.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 590.2403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 387.8513488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1564.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 584.8298950195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 387.291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 394.091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 382.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 389.2855224609375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 394.1627502441406
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 1543.2188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 581.2178344726562
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 675.87060546875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 541.887451171875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 1545.4991455078125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 392.7054138183594
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 1192.019287109375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 1831.63623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 2406.826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 393.8905944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 1472.064453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 483.58612060546875
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 1336.091552734375
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 1 21.0 1223.58357506 (16.11465619633363, 9)
loss 1543.376953125
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 497.8669738769531
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 1515.6922607421875
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 1277.287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1544.956298828125
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 709.1851196289062
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 4 19.0 1270.73108663 (16.845818065953559, 11)
loss 1547.2652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 395.34356689453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 1512.913330078125
############ Running episode number: 90  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1541.123779296875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 2910.218017578125
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 565.4719848632812
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 385.90576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 376.5924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1192.0933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 544.0902709960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 583.1469116210938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 2135.870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 388.7261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1454.97705078125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 595.3648071289062
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 0 23.0 986.786261176 (10.772009508959538, 7)
loss 728.80615234375
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 1 24.0 1047.1055689 (10.644925616761762, 6)
loss 388.8622741699219
Current State,action,reward,Response time,Next State:  (6, 10.644925616761762) 4 22.0 1101.44473373 (10.58735855349979, 8)
loss 1739.3148193359375
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 1466.63818359375
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 400.02593994140625
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 2 20.0 937.479622653 (10.489125480251131, 10)
loss 1391.67822265625
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 0 22.0 934.046546974 (10.448897752470936, 8)
loss 592.3326416015625
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 3 21.0 981.727167119 (10.433149880183072, 9)
loss 389.9066467285156
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 392.0152587890625
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 599.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 389.10650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 1621.35107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 1630.298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 389.98663330078125
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 392.8675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 387.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 388.3298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 635.3292236328125
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 1475.77880859375
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 554.6651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1523.679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 386.62176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 2327.9453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 1627.74658203125
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 1188.317626953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 4 19.0 924.913936648 (10.333617326102203, 11)
loss 394.0534973144531
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 2324.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1190.3245849609375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 2 19.0 919.032945938 (10.546025383098053, 11)
loss 394.4857177734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 479.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 545.2662963867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 392.20196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 387.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 382.0761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 584.6640625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 391.1733093261719
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 590.2348022460938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 384.3028869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 389.58111572265625
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 1396.916015625
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 0 22.0 1076.17782493 (13.649658108197247, 8)
loss 2101.87939453125
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 3 21.0 1168.79494995 (14.283719188889453, 9)
loss 1189.6695556640625
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 0 23.0 1170.79974938 (14.677479537099185, 7)
loss 387.28375244140625
Current State,action,reward,Response time,Next State:  (7, 14.677479537099185) 3 22.0 1290.48169407 (15.353965082180355, 8)
loss 401.1547546386719
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 393.8517150878906
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 390.39556884765625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 1 21.0 1251.130943 (16.871606159345866, 9)
loss 1626.8873291015625
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 1533.680908203125
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 1 21.0 1307.78684385 (17.669285735563751, 9)
loss 1199.44677734375
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 1666.3873291015625
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 444.3263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 389.70867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1581.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 1555.6541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 584.806640625
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 581.1542358398438
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 2343.62841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 430.6750183105469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 393.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1111.47802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 400.8531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 394.6690368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 588.3677368164062
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 520.5067138671875
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 544.2546997070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 2276.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 378.3900451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 1280.9046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 1007.1986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 3097.389892578125
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 1141.825439453125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 1231.0931396484375
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 507.0558166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 1708.30712890625
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 1472.4278564453125
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 1517.5361328125
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 512.8536987304688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 392.0830993652344
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 3334.736083984375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 4 19.0 1223.58357506 (16.11465619633363, 11)
loss 533.4676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 1018.6044311523438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 1516.3670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1508.95849609375
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 1541.85302734375
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 1510.47998046875
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 385.2940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 2262.489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 1008.7454833984375
############ Running episode number: 91  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 388.0020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 835.1599731445312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 471.6697692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 393.3070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 438.241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1475.3447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1008.3256225585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 377.16046142578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 2605.120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 388.13427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 400.22076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 1552.4967041015625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 1067.4259033203125
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 1531.3868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 391.13348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1501.287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 588.1271362304688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 379.7470397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 581.1393432617188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 577.63720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 1778.0821533203125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 391.4241638183594
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 594.181396484375
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 1196.73974609375
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 741.1326904296875
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 514.2246704101562
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 1712.64892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 1536.9630126953125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 382.2399597167969
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 1 22.0 960.611029141 (10.335411397720526, 8)
loss 1186.7867431640625
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 2 22.0 975.09448038 (10.305649118067803, 8)
loss 542.4360961914062
Current State,action,reward,Response time,Next State:  (8, 10.305649118067803) 3 21.0 973.355030047 (10.24826025489064, 9)
loss 389.26220703125
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 2 21.0 959.563389179 (10.276491935146446, 9)
loss 593.69775390625
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 397.6455993652344
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 0 22.0 920.672316722 (10.236272697871373, 8)
loss 1710.5267333984375
Current State,action,reward,Response time,Next State:  (8, 10.236272697871373) 3 21.0 969.300339391 (10.369891240151098, 9)
loss 1006.0086669921875
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 928.70751953125
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 2604.25439453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 4 19.0 914.152581784 (10.390165524255663, 11)
loss 391.3836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 2514.965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 385.4234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 393.4764099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 378.2251281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 637.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 378.1842346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 476.3286437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 2620.051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 473.60784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 387.62640380859375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 633.6157836914062
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 1245.5997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 384.782470703125
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 580.5537719726562
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 1 22.0 1137.6097809 (14.283719188889453, 8)
loss 2620.328125
Current State,action,reward,Response time,Next State:  (8, 14.283719188889453) 3 21.0 1205.85251983 (14.677479537099185, 9)
loss 2602.896728515625
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 4 19.0 1191.41116041 (15.353965082180355, 11)
loss 1537.864013671875
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 501.674072265625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 2521.324462890625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 0 22.0 1251.130943 (16.871606159345866, 8)
loss 1513.79931640625
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 3 21.0 1357.1010433 (17.534967586021782, 9)
loss 2093.24951171875
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 1 22.0 1340.98655806 (17.669285735563751, 8)
loss 702.7041625976562
Current State,action,reward,Response time,Next State:  (8, 17.669285735563751) 3 21.0 1403.72126261 (17.944480812078613, 9)
loss 439.2771301269531
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 503.68701171875
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 388.7406921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1831.6832275390625
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 395.45660400390625
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 1 22.0 1432.66131431 (19.340464848017284, 8)
loss 393.32537841796875
Current State,action,reward,Response time,Next State:  (8, 19.340464848017284) 4 20.0 1501.39298325 (19.213467265587269, 10)
loss 394.18267822265625
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 389.464111328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 384.86309814453125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 1471.89599609375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 1558.42138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 389.2147521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 444.1541442871094
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 387.7829895019531
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 0 22.0 1352.39307459 (17.82724819986867, 8)
loss 593.1423950195312
Current State,action,reward,Response time,Next State:  (8, 17.82724819986867) 3 21.0 1412.95334646 (17.229782241685768, 9)
loss 383.57928466796875
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 591.9777221679688
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 1185.642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 1403.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 592.3424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 1555.052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 395.3466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 1463.2027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 435.1572570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 390.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 2133.15478515625
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 1012.93603515625
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 578.9120483398438
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 704.6707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1193.98095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 474.3486022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 1202.0330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 471.7835388183594
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 3159.87255859375
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 388.5289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 566.3920288085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 380.94256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 434.1628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 2655.3779296875
############ Running episode number: 92  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 392.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 538.4139404296875
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 376.770751953125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 387.2662353515625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 593.2566528320312
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 0 23.0 1016.54054685 (11.25610796929319, 7)
loss 1261.4478759765625
Current State,action,reward,Response time,Next State:  (7, 11.25610796929319) 0 25.0 1077.27300243 (11.027107764209074, 5)
loss 700.983642578125
Current State,action,reward,Response time,Next State:  (5, 11.027107764209074) 3 24.0 1270.75995258 (10.995673623987257, 6)
loss 590.2467651367188
Current State,action,reward,Response time,Next State:  (6, 10.995673623987257) 0 26.0 1124.64289336 (10.931193889570471, 4)
loss 539.5734252929688
Current State,action,reward,Response time,Next State:  (4, 10.931193889570471) 3 25.0 1426.9014838 (10.816918347608043, 5)
loss 1557.2733154296875
Current State,action,reward,Response time,Next State:  (5, 10.816918347608043) 2 25.0 1251.44728215 (10.819208572963639, 5)
loss 1717.1417236328125
Current State,action,reward,Response time,Next State:  (5, 10.819208572963639) 3 24.0 1251.65771315 (10.768325938188134, 6)
loss 396.86724853515625
Current State,action,reward,Response time,Next State:  (6, 10.768325938188134) 2 24.0 1109.60632067 (10.772009508959538, 6)
loss 385.4454650878906
Current State,action,reward,Response time,Next State:  (6, 10.772009508959538) 3 23.0 1109.84994875 (10.644925616761762, 7)
loss 545.1470947265625
Current State,action,reward,Response time,Next State:  (7, 10.644925616761762) 1 24.0 1039.18611617 (10.58735855349979, 6)
loss 1520.83642578125
Current State,action,reward,Response time,Next State:  (6, 10.58735855349979) 3 23.0 1097.63729963 (10.552868829802469, 7)
loss 446.3539123535156
Current State,action,reward,Response time,Next State:  (7, 10.552868829802469) 3 22.0 1033.44943815 (10.553846649940214, 8)
loss 1678.105712890625
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 3 21.0 987.860883917 (10.489125480251131, 9)
loss 3677.81982421875
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 2016.5433349609375
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 599.0714721679688
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 596.1846313476562
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 1 21.0 931.538941947 (10.370942817486826, 9)
loss 389.4111328125
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 4 19.0 965.985215893 (10.42733414151318, 11)
loss 1278.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 543.43408203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 400.7073669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 865.2380981445312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 1600.795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 2364.637451171875
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 377.480712890625
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 504.5584716796875
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 2 20.0 925.892923039 (10.305649118067803, 10)
loss 3244.4130859375
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 575.4332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1020.1087036132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 538.5958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 438.1247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 748.8289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 1541.136474609375
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 1703.171630859375
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 0 22.0 925.797758139 (10.390165524255663, 8)
loss 1395.1739501953125
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 1 23.0 978.294574081 (10.425974763084863, 7)
loss 451.8538818359375
Current State,action,reward,Response time,Next State:  (7, 10.425974763084863) 3 22.0 1025.54181472 (10.546025383098053, 8)
loss 396.4437561035156
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 3 21.0 987.40377158 (10.655373370049301, 9)
loss 1404.9404296875
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 1 22.0 980.873751654 (10.624473674922116, 8)
loss 711.93115234375
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 1388.5870361328125
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 386.80523681640625
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 578.8999633789062
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 1479.5284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1006.3914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 509.5594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1189.34375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 593.8047485351562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 581.3675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 1596.3306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1510.923583984375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 395.4874572753906
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 0 23.0 1191.41116041 (15.353965082180355, 7)
loss 386.3034973144531
Current State,action,reward,Response time,Next State:  (7, 15.353965082180355) 3 22.0 1332.63806181 (15.836943704090487, 8)
loss 391.9814147949219
Current State,action,reward,Response time,Next State:  (8, 15.836943704090487) 3 21.0 1296.63040821 (16.466876895473597, 9)
loss 582.8428955078125
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 4 19.0 1285.07728144 (16.871606159345866, 11)
loss 2280.114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 398.0193786621094
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 1736.2774658203125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 578.8156127929688
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 390.08453369140625
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 581.8794555664062
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 2 21.0 1400.46626871 (19.02839494033929, 9)
loss 394.99285888671875
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 3 20.0 1419.16011 (19.286321916040979, 10)
loss 389.4991760253906
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 390.9128112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 396.93768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 395.3408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 399.0727233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 390.80743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 391.8605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 388.38397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 403.4159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 390.6689147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 397.6690368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 383.18499755859375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 392.17108154296875
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 2 21.0 1304.71919827 (16.237094554670044, 9)
loss 392.6783752441406
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 1 22.0 1273.04930988 (15.950694610794756, 8)
loss 390.59783935546875
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 2 22.0 1303.27855664 (15.828704162850809, 8)
loss 392.60504150390625
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 0 24.0 1296.14884991 (15.550833128512703, 6)
loss 391.1701965332031
Current State,action,reward,Response time,Next State:  (6, 15.550833128512703) 2 24.0 1425.91707068 (15.446694946204717, 6)
loss 385.5789489746094
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 1 25.0 1419.0294644 (15.750501603468638, 5)
loss 398.5433349609375
Current State,action,reward,Response time,Next State:  (5, 15.750501603468638) 2 25.0 1704.75586919 (15.817158911312735, 5)
loss 396.5086975097656
Current State,action,reward,Response time,Next State:  (5, 15.817158911312735) 1 26.0 1710.88049068 (15.829956988360925, 4)
loss 391.6617736816406
Current State,action,reward,Response time,Next State:  (4, 15.829956988360925) 1 -75.5996054643 1983.42247739 (15.892373986997768, 3)
loss 389.2538757324219
Current State,action,reward,Response time,Next State:  (3, 15.892373986997768) 1 -234.285246906 3025.99605464 (15.954793861767499, 2)
loss 392.22222900390625
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.954793861767499) 1 -235.833706102 4622.85246906 (16.004586266677634, 2)
loss 384.8746032714844
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.004586266677634) 1 -236.241362763 4638.33706102 (16.017694914042416, 2)
loss 389.9353332519531
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.017694914042416) 1 -234.059890506 4642.41362763 (15.947547279389703, 2)
loss 387.9497375488281
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 15.947547279389703) 1 -239.256693905 4620.59890506 (16.11465619633363, 2)
loss 392.7267150878906
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.11465619633363) 1 -240.264968694 4672.56693905 (16.147078378791146, 2)
loss 395.8367004394531
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.147078378791146) 0 -242.820472693 4682.64968694 (16.229253414601111, 2)
loss 993.0443725585938
Action -2 not possible so Scaled down by 0
Current State,action,reward,Response time,Next State:  (2, 16.229253414601111) 0 -244.86883715 4708.20472693 (16.295120821876548, 2)
loss 403.5871887207031
Current State,action,reward,Response time,Next State:  (2, 16.295120821876548) 3 -90.8526865427 4728.6883715 (16.667936385136993, 3)
loss 961.9107666015625
Current State,action,reward,Response time,Next State:  (3, 16.667936385136993) 3 16.224304988 3178.52686543 (16.836383524612351, 4)
loss 1534.141357421875
Current State,action,reward,Response time,Next State:  (4, 16.836383524612351) 4 24.0 2097.75695012 (16.845818065953559, 6)
loss 980.68896484375
Current State,action,reward,Response time,Next State:  (6, 16.845818065953559) 1 25.0 1511.56621672 (17.052961248403161, 5)
loss 990.5977172851562
Current State,action,reward,Response time,Next State:  (5, 17.052961248403161) 4 23.0 1824.42875413 (17.215992726625572, 7)
loss 994.8967895507812
############ Running episode number: 93  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 393.1982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 392.2563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 399.40887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 930.2533569335938
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 1498.0628662109375
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 1030.8616943359375
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 1 22.0 1012.3192433 (11.027107764209074, 8)
loss 2088.850830078125
Current State,action,reward,Response time,Next State:  (8, 11.027107764209074) 3 21.0 1015.52053272 (10.995673623987257, 9)
loss 968.21337890625
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 388.2828063964844
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 388.6311340332031
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 955.6887817382812
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 2123.291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 385.89569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 962.6829223632812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 980.8283081054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 2099.5634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 403.4394226074219
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 990.4720458984375
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 396.1997375488281
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 1013.4104614257812
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 1 21.0 931.077372094 (10.44185150623065, 9)
loss 392.2158203125
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 1510.392822265625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 991.9163208007812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 1529.7181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 387.2872009277344
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 395.6908874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 955.9352416992188
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 388.5386047363281
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 392.4070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 529.1917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 950.0960083007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 440.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 1532.66455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 381.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 388.2318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 1514.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 925.0354614257812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 986.7308959960938
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 382.1528625488281
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 952.3344116210938
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 392.3736877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1523.3277587890625
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 395.9620666503906
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 967.9852905273438
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 0 22.0 949.018321829 (10.924797168745895, 8)
loss 1559.453369140625
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 0 24.0 1009.54101094 (11.039747673816453, 6)
loss 384.4093933105469
Current State,action,reward,Response time,Next State:  (6, 11.039747673816453) 3 23.0 1127.55791161 (11.271571944085663, 7)
loss 391.713623046875
Current State,action,reward,Response time,Next State:  (7, 11.271571944085663) 3 22.0 1078.23666679 (11.670334358779868, 8)
loss 444.0380859375
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 3 21.0 1053.11377918 (11.819721938468785, 9)
loss 395.2175598144531
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 1 22.0 1041.82165315 (12.19918626616789, 8)
loss 987.2660522460938
Current State,action,reward,Response time,Next State:  (8, 12.19918626616789) 3 21.0 1084.02242049 (12.501496275411796, 9)
loss 936.9141845703125
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
loss 971.655517578125
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 2 20.0 1076.17782493 (13.649658108197247, 10)
loss 977.9392700195312
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 1 21.0 1101.69413046 (14.283719188889453, 9)
loss 393.9456481933594
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 934.6790161132812
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 1567.8460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 941.7164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1009.4937133789062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 466.6092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 947.2766723632812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 520.60107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 946.2952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 464.3818664550781
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 401.64666748046875
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 387.5852966308594
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 390.2369689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 388.7605285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 2086.388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1024.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 941.2720336914062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 404.4114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1501.6329345703125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 467.9004211425781
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 0 23.0 1422.05003169 (18.668181536495972, 7)
loss 398.9647521972656
Current State,action,reward,Response time,Next State:  (7, 18.668181536495972) 4 21.0 1539.16919707 (18.375894992990247, 9)
loss 962.342041015625
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 4 19.0 1385.00495784 (17.82724819986867, 11)
loss 388.4122619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 385.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 960.4178466796875
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 945.0880126953125
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 448.2161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 393.2614440917969
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 2118.096923828125
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 4 19.0 1237.12691092 (15.446694946204717, 11)
loss 405.9380187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 384.8226623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 402.6691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 961.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 389.25640869140625
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 943.6090087890625
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 1 22.0 1258.27226176 (16.004586266677634, 8)
loss 2093.439453125
Current State,action,reward,Response time,Next State:  (8, 16.004586266677634) 3 21.0 1306.42824342 (16.017694914042416, 9)
loss 520.6082763671875
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 1581.541259765625
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 473.0542297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 935.3504638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 947.6250610351562
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 991.4054565429688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1604.1326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 396.7843933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 1577.3585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 943.992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 467.3908386230469
############ Running episode number: 94  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 970.8633422851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 399.5579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 949.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 947.9918212890625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 943.9915161132812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 393.6120300292969
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 473.99676513671875
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 398.0910949707031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 396.18536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 471.3580017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 450.7209777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 392.1322937011719
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 474.77569580078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 1571.097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 407.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 943.8697509765625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 376.33599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 389.1512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 406.4814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1522.19873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 395.8005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 390.0809020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 397.1270751953125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 1518.6630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 1491.5465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 964.34814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 396.7457275390625
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 2 19.0 912.494916918 (10.278181486298042, 11)
loss 388.7817687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 935.0206909179688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 932.8831176757812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 977.0216674804688
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 389.49505615234375
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 0 22.0 921.2700698 (10.276491935146446, 8)
loss 954.4376220703125
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 3 21.0 971.650944469 (10.236991269871366, 9)
loss 960.0410766601562
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 0 23.0 958.973513426 (10.236272697871373, 7)
loss 964.1841430664062
Current State,action,reward,Response time,Next State:  (7, 10.236272697871373) 0 25.0 1013.72020216 (10.369891240151098, 5)
loss 942.5673828125
Current State,action,reward,Response time,Next State:  (5, 10.369891240151098) 1 26.0 1210.37343867 (10.316955310454549, 4)
loss 971.9302978515625
Current State,action,reward,Response time,Next State:  (4, 10.316955310454549) 3 25.0 1357.1212847 (10.333617326102203, 5)
loss 453.2445373535156
Current State,action,reward,Response time,Next State:  (5, 10.333617326102203) 4 23.0 1207.04051089 (10.390165524255663, 7)
loss 394.3726806640625
Current State,action,reward,Response time,Next State:  (7, 10.390165524255663) 2 23.0 1023.31030002 (10.425974763084863, 7)
loss 1024.0189208984375
Current State,action,reward,Response time,Next State:  (7, 10.425974763084863) 3 22.0 1025.54181472 (10.546025383098053, 8)
loss 401.6840515136719
Current State,action,reward,Response time,Next State:  (8, 10.546025383098053) 3 21.0 987.40377158 (10.655373370049301, 9)
loss 942.2176513671875
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 952.1834106445312
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 1516.0894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 392.7992858886719
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 936.3970336914062
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 933.5966186523438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1568.4296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 958.293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 928.9115600585938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 969.3785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 463.6483459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 936.2771606445312
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 957.9124755859375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 964.0826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 391.96478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 1528.53369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 951.7777099609375
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 987.6033325195312
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 444.80279541015625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 941.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 3278.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 977.5383911132812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 942.45703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 940.3909301757812
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 389.2041015625
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 1025.782470703125
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 2201.398193359375
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 0 23.0 1428.84773289 (19.140765783401285, 7)
loss 930.6159057617188
Current State,action,reward,Response time,Next State:  (7, 19.140765783401285) 3 22.0 1568.61910246 (19.385636054792762, 8)
loss 1557.8763427734375
Current State,action,reward,Response time,Next State:  (8, 19.385636054792762) 3 21.0 1504.03300517 (19.223969507401588, 9)
loss 981.80419921875
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 1486.45068359375
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 1489.4312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 973.41162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 980.6024780273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 1511.619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 981.8219604492188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 394.13641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 986.3672485351562
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 387.9855651855469
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 383.419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 965.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 387.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 391.4339294433594
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 391.95172119140625
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 2112.4130859375
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 0 22.0 1217.34610485 (15.892373986997768, 8)
loss 986.6382446289062
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 3 21.0 1299.87001973 (15.954793861767499, 9)
loss 998.8012084960938
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 393.9289245605469
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 956.1190185546875
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 1598.6541748046875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 4 19.0 1223.58357506 (16.11465619633363, 11)
loss 388.8316345214844
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 933.6281127929688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 932.0855102539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 471.2304992675781
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 1020.9229125976562
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 389.7441711425781
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 1 21.0 1270.73108663 (16.845818065953559, 9)
loss 984.013671875
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 389.3616638183594
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 1577.77099609375
############ Running episode number: 95  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 391.1922912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 941.2940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 467.1533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 942.7349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 382.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 962.4785766601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 945.4038696289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 393.0673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 932.814697265625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 1024.9696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 388.9784851074219
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 382.7142639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 974.9994506835938
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 394.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 392.5901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 382.3340759277344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 468.2848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 390.7107238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 394.9587707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 439.07843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 990.1285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 976.8997802734375
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 1544.726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 991.7911987304688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 403.8747253417969
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 1542.0838623046875
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 1000.7116088867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 2079.1240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1507.3665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 389.2840576171875
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 464.24114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1029.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 934.21875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 2047.7208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 390.42486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 383.3797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 383.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 931.0615234375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 957.8279418945312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 932.155029296875
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 954.6383666992188
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 4 19.0 975.14992417 (10.655373370049301, 11)
loss 1532.5362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 388.9707336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 941.731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 936.2529296875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 939.5862426757812
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 4 19.0 963.253801267 (11.271571944085663, 11)
loss 936.3848266601562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1533.03857421875
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 932.775146484375
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 391.65911865234375
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 933.7653198242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 959.2196655273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 462.04681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 944.794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1485.22021484375
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 1544.0665283203125
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 987.41064453125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 391.5777587890625
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 932.8546142578125
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 941.7844848632812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1028.876220703125
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 963.4295654296875
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 1 21.0 1329.50910109 (18.385807405229915, 9)
loss 1602.888671875
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 936.824951171875
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 958.0958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 963.7500610351562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 390.2096862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 957.0431518554688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 943.4902954101562
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 2112.15576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 1611.08837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 1537.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 1487.329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 931.2705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 2052.438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 381.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 937.308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 1492.1153564453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 1024.60693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 969.245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 952.2252807617188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 940.6411743164062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 2079.4140625
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 1546.8909912109375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 1523.416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 1502.95849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 966.0630493164062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 380.4795837402344
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 464.71533203125
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 1038.6834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 387.72265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 392.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 390.359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 965.5474243164062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 386.77008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1044.153564453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 380.71209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 974.8875122070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 1539.4462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 937.1190185546875
############ Running episode number: 96  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 928.53125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 389.950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 390.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 382.3695983886719
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 387.5625915527344
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 2 21.0 1016.54054685 (11.25610796929319, 9)
loss 939.8798828125
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 925.8826293945312
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 952.4512939453125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 1012.1907958984375
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 472.0890197753906
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 1504.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 957.8320922851562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 387.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 935.3590698242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 1509.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 469.75177001953125
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 1504.8931884765625
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 387.7391052246094
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 2144.3330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 2633.54638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 1520.1171875
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 1513.478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 989.830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 384.47882080078125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 973.3617553710938
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 388.10992431640625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 1568.17236328125
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 385.9488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1532.4459228515625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 1586.256103515625
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 976.083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 1478.09912109375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 385.8101806640625
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 1 21.0 922.767593645 (10.236991269871366, 9)
loss 1495.07373046875
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 398.7438659667969
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 383.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 935.3538208007812
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 389.873291015625
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 1544.31982421875
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 379.0980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 944.0032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 388.6209716796875
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 390.18194580078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 4 19.0 941.225969064 (10.771376986314287, 11)
loss 393.53021240234375
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 387.74542236328125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 928.26904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 1525.1890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 1026.303955078125
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 962.5817260742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 384.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1609.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 381.8434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 926.4610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 1542.646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 472.9504089355469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 934.897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 436.5177307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 381.6077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 391.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 382.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 943.2705688476562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 441.9632873535156
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 384.30609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 938.5689086914062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1037.2279052734375
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 936.877685546875
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 966.4232177734375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 386.1151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 380.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 1502.27880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 1527.232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 381.0990295410156
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 991.3018188476562
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 390.0914001464844
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 379.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 941.8734741210938
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 939.1026000976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 947.918212890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 937.552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 960.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 1558.867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 972.5469360351562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 951.8395385742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 2139.49365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 394.1162109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 441.1591491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 979.2229614257812
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 939.546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 382.0704650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 387.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 929.354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 385.8045349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 1530.1220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 990.7315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 1020.3018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 1551.120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 931.2691040039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 929.0741577148438
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 2114.465576171875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 1624.8536376953125
############ Running episode number: 97  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 973.5213012695312
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 1529.1318359375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 439.0196533203125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 0 22.0 993.95437024 (11.469111876584304, 8)
loss 1474.5313720703125
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 1529.1375732421875
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 387.2931823730469
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 956.3920288085938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 385.693603515625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 1592.7772216796875
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 1482.5562744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 932.0397338867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 935.6741333007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 1520.4927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 1526.49462890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 383.9920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 2118.1181640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 375.5527648925781
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 945.3418579101562
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 963.603759765625
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 1576.6339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 390.43353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 388.8685607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 944.7052612304688
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 1520.363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 387.3533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 963.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 383.55487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 474.86834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 941.3760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 379.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 1010.01904296875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 383.32220458984375
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 385.6348571777344
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 386.3724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 939.5928344726562
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 381.4495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 958.886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 994.2131958007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 379.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 1546.9698486328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 1505.0147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 393.76544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 977.0199584960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 1030.346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 963.7742919921875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 462.8606262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 2651.392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 438.9095153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 389.2691345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 979.3265991210938
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 380.2651672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 1029.7430419921875
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 377.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 969.4563598632812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 401.00128173828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 469.9422912597656
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 2 19.0 1179.43847566 (15.836943704090487, 11)
loss 925.0145874023438
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 948.4760131835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 436.16436767578125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 1051.2322998046875
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 388.8948669433594
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 378.9598083496094
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 442.0365905761719
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 926.927490234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 4 19.0 1368.06085906 (19.02839494033929, 11)
loss 1044.9317626953125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 932.5291748046875
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 947.9130859375
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 384.1431884765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 386.2037048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 925.0964965820312
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 940.6229248046875
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 391.8349914550781
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 1522.354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 387.1446838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 439.280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 945.040771484375
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 956.1688842773438
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 0 23.0 1325.01161138 (16.84211602880065, 7)
loss 474.699951171875
Current State,action,reward,Response time,Next State:  (7, 16.84211602880065) 3 22.0 1425.37476404 (16.237094554670044, 8)
loss 378.91949462890625
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 3 21.0 1320.01714264 (15.950694610794756, 9)
loss 383.64654541015625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 380.9079895019531
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 1058.061767578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
loss 936.828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 379.8370056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 454.73590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 374.4977722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 389.24774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 1538.1324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 383.4561462402344
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 2667.007080078125
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 929.49072265625
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 991.86279296875
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 948.7548217773438
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 382.43377685546875
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 2 20.0 1238.52642122 (16.295120821876548, 10)
loss 1490.2574462890625
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 390.2795104980469
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 1548.4967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 436.39373779296875
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 384.4624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 935.8241577148438
############ Running episode number: 98  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 383.9161071777344
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 956.0043334960938
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 379.490478515625
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 947.6045532226562
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 388.6792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 381.4273986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1480.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 1528.2786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 982.9220581054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 925.90380859375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 1511.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 942.4327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 1537.0804443359375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 378.2118835449219
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 0 22.0 942.310823749 (10.58735855349979, 8)
loss 945.5953979492188
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 982.7495727539062
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 391.7557373046875
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 925.3836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 468.9091491699219
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 1488.4830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 384.1134948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 384.7532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 440.080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 388.9074401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 954.5650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 932.703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 2636.342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 1534.4234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 2028.6702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 379.38818359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 1115.7816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 933.9420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 958.1959838867188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 954.759521484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 382.14605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 931.6784057617188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 1504.5047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 1598.472900390625
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 2059.89208984375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 385.5679016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 381.1040954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1039.17236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 1022.3502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 1009.5352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 1578.3284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 936.60693359375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 387.26007080078125
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 1551.7850341796875
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 942.3737182617188
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 976.5967407226562
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 0 21.0 1012.73322757 (12.501496275411796, 9)
loss 957.1887817382812
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 4 19.0 1077.50917513 (13.168618569876575, 11)
loss 1479.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 373.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 930.4387817382812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 1003.923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 960.9719848632812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 930.09033203125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 933.4456787109375
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 925.1854248046875
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 379.262939453125
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 386.4761657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 390.74871826171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 391.0556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 383.4148864746094
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 977.8191528320312
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 433.63677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 1480.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 973.0556030273438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1017.0349731445312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 1538.3721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 2120.2958984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 915.8981323242188
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 979.5442504882812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 382.0860900878906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 386.5997009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 380.807861328125
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 1058.710205078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 4 19.0 1291.59856437 (16.84211602880065, 11)
loss 381.7568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 474.40069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 387.55712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 461.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 948.5582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 1671.2554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 387.26953125
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 1 20.0 1200.39231205 (15.817158911312735, 10)
loss 965.089599609375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 973.4583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 387.34063720703125
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 389.66455078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 4 19.0 1223.96796344 (16.004586266677634, 11)
loss 388.268798828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 951.7926635742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 929.1764526367188
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 927.561279296875
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 1 21.0 1232.44771583 (16.147078378791146, 9)
loss 1492.6689453125
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 0 23.0 1268.3374073 (16.229253414601111, 7)
loss 2189.69873046875
Current State,action,reward,Response time,Next State:  (7, 16.229253414601111) 3 22.0 1387.18316937 (16.295120821876548, 8)
loss 386.0957336425781
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 1 23.0 1323.40847593 (16.667936385136993, 7)
loss 388.2460021972656
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 3 22.0 1414.52045804 (16.836383524612351, 8)
loss 929.4719848632812
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 3 21.0 1355.04246364 (16.845818065953559, 9)
loss 448.2068176269531
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 931.9932861328125
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 376.56390380859375
############ Running episode number: 99  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 2061.90673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 384.7127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 2226.4609375
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 961.1949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 391.92315673828125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 944.3213500976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 919.646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 2187.030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 376.244384765625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 938.888916015625
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 390.89971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 971.0173950195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 391.720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 1501.2076416015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 934.5252075195312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 391.6305847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 387.25665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1055.013916015625
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 2062.232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 380.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 382.4386291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1507.759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 959.5824584960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 1039.6513671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 384.0805969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 957.7237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 973.7798461914062
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 987.2531127929688
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 2 20.0 922.857214352 (10.268274366284802, 10)
loss 384.87786865234375
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 936.6974487304688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 943.9867553710938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 939.24609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 383.7585754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 2096.7509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 928.1492309570312
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 937.2753295898438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 935.3836059570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 946.513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 1499.5408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 383.5440368652344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 959.6592407226562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 389.85321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 959.4229125976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 938.9183959960938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 383.1802062988281
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 923.6640014648438
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 378.7077331542969
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 379.6907043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 929.84521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 943.1135864257812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 383.1968078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 923.4402465820312
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.7948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 950.1743774414062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 438.4628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 1569.169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 375.5416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 1022.572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1531.66943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 382.0035095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 974.1507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 1068.099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 381.4636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 371.60699462890625
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 0 21.0 1354.73183582 (19.02839494033929, 9)
loss 931.0861206054688
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 3 20.0 1419.16011 (19.286321916040979, 10)
loss 938.5594482421875
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 1 21.0 1400.68584406 (19.340464848017284, 9)
loss 1523.8643798828125
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 2 21.0 1435.4954296 (19.213467265587269, 9)
loss 956.0845336914062
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 1 22.0 1428.84773289 (19.140765783401285, 8)
loss 924.0792236328125
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 385.77679443359375
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 2 21.0 1437.85991935 (19.223969507401588, 9)
loss 379.4195251464844
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 0 23.0 1429.39747342 (19.25591252280865, 7)
loss 387.7203063964844
Current State,action,reward,Response time,Next State:  (7, 19.25591252280865) 4 21.0 1575.79467084 (19.08360399753829, 9)
loss 381.7815246582031
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 509.9134521484375
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 384.3404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 922.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 384.94732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 384.63616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 382.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 946.8065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 386.9433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 971.3433227539062
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 2055.88330078125
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 1534.11474609375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 0 22.0 1213.1314661 (15.817158911312735, 8)
loss 1606.2847900390625
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
loss 933.32421875
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 377.60125732421875
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 934.3279418945312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 945.9683227539062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 379.8761291503906
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 951.3428344726562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 932.5079956054688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 462.646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 982.7163696289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 939.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 935.8035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 974.8029174804688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 1473.60693359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 4 19.0 1258.27117243 (17.052961248403161, 11)
loss 2088.3740234375
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 930.0107421875
############ Running episode number: 100  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 388.68408203125
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 2151.28173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 1511.9854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 433.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 383.42388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 379.6493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 1491.0283203125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 951.626708984375
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 1 21.0 960.915933313 (10.931193889570471, 9)
loss 460.3125305175781
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 0 23.0 995.311594677 (10.816918347608043, 7)
loss 933.7755126953125
Current State,action,reward,Response time,Next State:  (7, 10.816918347608043) 4 21.0 1049.90414092 (10.819208572963639, 9)
loss 1562.3753662109375
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 386.5023498535156
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 1487.24853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 966.6549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 378.72625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 1604.04296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 384.1427917480469
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 1490.496826171875
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 382.8492736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1488.4049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 380.66351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 381.6818542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 381.6369323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 932.0122680664062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 379.39080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 1523.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 384.3328552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 948.7862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 456.2731628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 386.8028564453125
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 380.03753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 988.2105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 434.4559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 2076.60986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 381.5824279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 459.9555969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 436.0881652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 935.45556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 386.1474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 927.5239868164062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 1572.1502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 1525.0723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 437.20068359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 439.7973937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 384.1402282714844
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 386.7528381347656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 1502.1680908203125
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 1615.7637939453125
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 429.5570983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 1032.4476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 1608.0205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 949.7747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 382.71612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 974.1259155273438
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 376.5286865234375
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 382.6888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 380.2091369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 382.3492126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 2151.236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 928.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 967.0009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 950.1116333007812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 981.272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 1512.4649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 973.1685791015625
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 975.5579223632812
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 436.2234802246094
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 373.8065490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 1537.2720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 382.5546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 984.2764892578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 932.395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 383.7174987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 373.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 382.4548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 954.8964233398438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 386.3887634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 1561.370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 1037.7523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 375.295166015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 383.57696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 378.3575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 382.7538146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 1555.3739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 377.4716491699219
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
loss 393.00244140625
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 2 21.0 1251.7376675 (15.892373986997768, 9)
loss 390.1253662109375
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 432.7759094238281
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 973.4039916992188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 378.7499084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 1477.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 391.8818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 435.1221618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 378.41436767578125
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 377.6402282714844
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 962.4994506835938
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 381.26800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 936.9290161132812
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 958.1066284179688
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 4 19.0 1282.21925533 (17.215992726625572, 11)
loss 384.60186767578125
############ Running episode number: 101  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 1042.5391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 378.4248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 377.86834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 941.3532104492188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 377.8607482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 950.3561401367188
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 1493.7447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 379.7568054199219
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 432.7463684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 381.25091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 390.65557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 382.67059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 439.075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 926.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 384.8934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 983.0828247070312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 2122.733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 386.0732727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 1565.971435546875
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 974.7632446289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 380.4004211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 1555.9940185546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 925.4536743164062
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 967.4649658203125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 925.1051635742188
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 973.4375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 371.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 379.9324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1566.6229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 938.5784301757812
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 459.24017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 979.8942260742188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 379.5521545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 377.56988525390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 368.19366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 958.3720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 379.7287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 931.808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 383.8847351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 383.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 1007.7938232421875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 948.7892456054688
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 1 22.0 980.873751654 (10.624473674922116, 8)
loss 926.4904174804688
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 1515.5936279296875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 951.557861328125
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 373.8003845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 454.11041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 937.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 373.41162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 983.7607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 386.6760559082031
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 437.23876953125
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 1566.025634765625
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 2037.22509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 931.652587890625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 1520.9605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 934.51171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 388.30389404296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 973.9150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 433.8941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 1523.32958984375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 1497.41357421875
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 382.43072509765625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 1 21.0 1352.9188695 (18.671267839956315, 9)
loss 376.5047607421875
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 379.1798095703125
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 1493.357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 950.994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 937.769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 2024.6060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 374.85577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 2091.9794921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 373.6640930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 437.504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 1563.406494140625
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 918.8482666015625
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 957.0632934570312
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 386.7236328125
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 2073.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 386.59564208984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 388.3741149902344
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 380.0511169433594
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 2099.00537109375
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 2025.311279296875
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 1 21.0 1197.01631782 (15.750501603468638, 9)
loss 1008.5994873046875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 975.7348022460938
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 1499.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 436.6486511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 381.0941467285156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 378.4700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 926.0344848632812
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 1493.3748779296875
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 384.4058837890625
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 430.593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 516.3056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 924.04736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 384.7174072265625
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 382.685302734375
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 4 19.0 1304.41912996 (16.845818065953559, 11)
loss 1496.586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 374.7734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 933.90478515625
############ Running episode number: 102  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 924.1849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 379.39404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 933.2669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 1511.33837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 2073.46142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 1511.33349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 383.1864929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 937.2623901367188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 2125.05615234375
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 938.6551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 1488.798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 378.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 1579.84765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 375.1329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 374.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 374.25927734375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 433.6837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 1520.6219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 928.767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 1485.1783447265625
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 372.54693603515625
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 457.9924621582031
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 378.3530578613281
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 4 19.0 930.768881517 (10.388469398680568, 11)
loss 370.34344482421875
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 373.7780456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 455.111572265625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 1474.721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 979.1708374023438
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 1564.739990234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 375.3184509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 920.581787109375
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 427.6352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 455.2898864746094
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 380.5964050292969
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 1466.0682373046875
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 450.5575256347656
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 383.2530822753906
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 1042.3558349609375
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 922.1930541992188
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 975.4201049804688
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 369.7268371582031
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 4 19.0 975.14992417 (10.655373370049301, 11)
loss 1533.3568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 377.7167053222656
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 371.15643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 972.0169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 373.6239929199219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 1481.325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 926.7554321289062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 372.8545837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 955.4580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 947.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 371.47760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 964.449951171875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 374.2210388183594
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 0 22.0 1135.32732476 (14.677479537099185, 8)
loss 376.3648376464844
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 4 20.0 1228.86576266 (15.353965082180355, 10)
loss 966.0475463867188
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 378.2142028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 928.5345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 1002.3152465820312
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 373.7132873535156
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 431.4452209472656
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 1498.7979736328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 374.95367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 381.0447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 1000.42138671875
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 1007.7887573242188
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 928.6729125976562
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 948.1427612304688
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 378.4205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 1006.16064453125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 379.7620544433594
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 923.8553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 378.4307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 378.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 929.5972290039062
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 373.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 435.5231628417969
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 372.0414123535156
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 376.3197937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 959.6002807617188
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 921.6856079101562
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 375.16357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 1494.7945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 374.38714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 368.8858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 971.856689453125
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 1474.04931640625
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 1476.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 456.2123718261719
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 952.8938598632812
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 1544.2965087890625
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 962.5382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 372.2655334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 377.392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 456.2228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.0608215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 372.0199890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 370.0614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 376.59326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 378.140380859375
############ Running episode number: 103  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 384.21954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 375.6939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 377.84906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 382.32562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 372.278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 371.828125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 378.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 373.85107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 372.8403015136719
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 382.34075927734375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 374.3070068359375
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 376.4966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 368.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 373.2717590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 367.4564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 378.0517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 372.1390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 383.9248352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 375.76287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 369.96014404296875
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 370.47540283203125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 4 19.0 931.538941947 (10.370942817486826, 11)
loss 365.7290954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.9696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.09698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 377.3505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 372.7298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 370.03729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 373.537353515625
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 375.4735107421875
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 0 22.0 922.331700166 (10.335411397720526, 8)
loss 373.2583923339844
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 3 21.0 975.09448038 (10.305649118067803, 9)
loss 367.85076904296875
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 372.140625
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 376.03863525390625
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 373.47381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 372.65045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 374.9743347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 373.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 379.6069030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.4200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 368.85894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 373.123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 370.054931640625
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 375.0386962890625
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 374.5764465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 375.51727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 378.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 375.41424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 374.97894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 375.62701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 368.7098693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 381.0490417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 375.3139953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 368.86956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.13983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 376.45318603515625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 376.6364440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 373.36651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 380.5246887207031
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 373.57415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 375.0154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 372.826171875
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.65228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.40576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 372.21929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 377.2485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 371.79803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.2832946777344
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.2671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.6782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 371.51739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 371.1390686035156
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 0 21.0 1383.93777195 (19.25591252280865, 9)
loss 371.62567138671875
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 369.79180908203125
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 366.4889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 378.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 371.2197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 372.1831970214844
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 375.20361328125
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 374.12542724609375
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 1 21.0 1238.94234737 (15.950694610794756, 9)
loss 373.2908630371094
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 2 21.0 1258.0576862 (15.828704162850809, 9)
loss 370.37841796875
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 1 22.0 1251.67208827 (15.550833128512703, 8)
loss 368.3331298828125
Current State,action,reward,Response time,Next State:  (8, 15.550833128512703) 3 21.0 1279.90873428 (15.446694946204717, 9)
loss 371.4161682128906
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 370.65484619140625
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 1 21.0 1213.1314661 (15.817158911312735, 9)
loss 371.5104675292969
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 2 21.0 1251.06775133 (15.829956988360925, 9)
loss 370.733154296875
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 370.7532043457031
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 1 21.0 1220.65695786 (15.954793861767499, 9)
loss 381.34075927734375
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 369.6357116699219
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 375.591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 372.7857971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 373.1839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 371.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 376.1191711425781
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 371.3573913574219
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 374.8260192871094
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 370.5614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 372.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 369.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 370.80706787109375
############ Running episode number: 104  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 375.3891906738281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 373.5972595214844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 367.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 374.9818420410156
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 371.98095703125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 372.4400634765625
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 377.3363952636719
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 371.4623718261719
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 374.62640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 375.6512756347656
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 366.7489013671875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 375.8398742675781
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 367.5153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 374.7849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 374.82330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 377.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 370.1746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 370.892578125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 377.1811218261719
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 374.6803283691406
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 372.5986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 374.4498291015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 382.0484313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 370.715087890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 363.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 370.801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 374.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 370.3929138183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 374.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.76397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 379.8078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 379.1087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 370.8285217285156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 376.3443908691406
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 368.395751953125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 371.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 369.4160461425781
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 376.7567138671875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 1 22.0 966.991429728 (10.425974763084863, 8)
loss 373.71942138671875
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 3 21.0 980.387437704 (10.546025383098053, 9)
loss 368.7807922363281
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 371.8143310546875
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 374.2355041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 375.6329345703125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 369.4075012207031
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 374.1968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 369.96221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 367.7333984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 368.46185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 370.5218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 372.8756103515625
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 371.08892822265625
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 371.18634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.1405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 372.9121398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.8694152832031
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 372.3767395019531
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 371.28875732421875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 4 19.0 1272.5994393 (17.534967586021782, 11)
loss 371.8874206542969
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 365.7134094238281
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 373.6487121582031
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 4 19.0 1329.50910109 (18.385807405229915, 11)
loss 374.2937316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.2210998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.8448486328125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 373.3208923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.0500183105469
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 372.17657470703125
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 375.2109680175781
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 374.8995666503906
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 372.95648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.8984069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 369.95452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 372.9245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 372.10906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 372.02398681640625
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 371.9272155761719
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 365.6454162597656
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 0 22.0 1271.03516211 (16.237094554670044, 8)
loss 368.3525390625
Current State,action,reward,Response time,Next State:  (8, 16.237094554670044) 3 21.0 1320.01714264 (15.950694610794756, 9)
loss 371.0104064941406
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 372.7784729003906
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 368.659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 370.8669738769531
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 371.2831115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 371.7423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 373.5349426269531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 369.17156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 372.0755615234375
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 370.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 370.37689208984375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 369.6654357910156
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 369.8095397949219
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 367.1302185058594
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 367.9084167480469
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 378.2654113769531
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 0 23.0 1276.0866986 (16.667936385136993, 7)
loss 372.5495910644531
Current State,action,reward,Response time,Next State:  (7, 16.667936385136993) 3 22.0 1414.52045804 (16.836383524612351, 8)
loss 372.1973876953125
Current State,action,reward,Response time,Next State:  (8, 16.836383524612351) 3 21.0 1355.04246364 (16.845818065953559, 9)
loss 371.9721374511719
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 375.71014404296875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 367.2517395019531
############ Running episode number: 105  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.9506530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 370.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 373.3277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 372.4844055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 366.30462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 373.3648986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 374.9307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 370.98712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 381.7769775390625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 375.5166931152344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 368.5526123046875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 371.0427551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 369.5935363769531
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 372.3564453125
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 2 20.0 942.310823749 (10.58735855349979, 10)
loss 368.7374267578125
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 371.4213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 370.2216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.1206359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 372.930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 371.4817199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 375.66998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 373.0343017578125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 373.6592712402344
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 1 21.0 928.707336523 (10.344006106602812, 9)
loss 374.272705078125
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 367.93927001953125
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 370.2945556640625
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 371.10723876953125
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 373.91064453125
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 2 20.0 922.331700166 (10.335411397720526, 10)
loss 375.9160461425781
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 382.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 370.0972595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 367.5093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 369.7140197753906
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 370.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.3417053222656
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 368.20758056640625
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 370.2145080566406
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 1 21.0 925.797758139 (10.390165524255663, 9)
loss 370.44232177734375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 2 21.0 966.991429728 (10.425974763084863, 9)
loss 376.2530517578125
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 0 23.0 968.865866662 (10.546025383098053, 7)
loss 374.4511413574219
Current State,action,reward,Response time,Next State:  (7, 10.546025383098053) 3 22.0 1033.02297692 (10.655373370049301, 8)
loss 373.74786376953125
Current State,action,reward,Response time,Next State:  (8, 10.655373370049301) 1 23.0 993.794592261 (10.624473674922116, 7)
loss 373.3215637207031
Current State,action,reward,Response time,Next State:  (7, 10.624473674922116) 3 22.0 1037.91161802 (10.771376986314287, 8)
loss 370.6589050292969
Current State,action,reward,Response time,Next State:  (8, 10.771376986314287) 1 23.0 1000.57439983 (10.924797168745895, 7)
loss 373.55712890625
Current State,action,reward,Response time,Next State:  (7, 10.924797168745895) 3 22.0 1056.62679639 (11.039747673816453, 8)
loss 369.8260803222656
Current State,action,reward,Response time,Next State:  (8, 11.039747673816453) 3 21.0 1016.25926965 (11.271571944085663, 9)
loss 375.0254821777344
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 0 23.0 1013.12870607 (11.670334358779868, 7)
loss 372.4788513183594
Current State,action,reward,Response time,Next State:  (7, 11.670334358779868) 3 22.0 1103.08623692 (11.819721938468785, 8)
loss 369.1406555175781
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 3 21.0 1061.84470565 (12.19918626616789, 9)
loss 379.1755065917969
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 376.5104064941406
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 1 21.0 1040.79092857 (13.168618569876575, 9)
loss 374.0145568847656
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 374.2501525878906
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 374.255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 370.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 371.4710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 373.3027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 372.4160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 374.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 374.7608337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 373.58380126953125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 370.273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 370.0057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.0498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 371.87652587890625
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 371.7405700683594
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 376.1454162597656
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 370.2862243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 376.205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 369.452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 369.6182556152344
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 373.4743957519531
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 374.77081298828125
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 375.1064147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.8689270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 375.2674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 374.518310546875
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 377.9234619140625
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 2 21.0 1273.04930988 (15.950694610794756, 9)
loss 369.8973083496094
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 373.1936950683594
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 4 19.0 1217.27964986 (15.550833128512703, 11)
loss 370.2138366699219
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 374.21588134765625
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 374.855224609375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 367.69317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 370.7684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 370.9444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 375.22052001953125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 373.713134765625
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 379.3740234375
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 371.4175109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 371.07928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 373.1800842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 377.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 371.8905944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 372.02593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 370.45587158203125
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 372.4759826660156
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 371.9818115234375
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 377.25341796875
############ Running episode number: 106  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 373.8682556152344
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 0 22.0 1012.7846064 (11.786394321941378, 8)
loss 372.5726318359375
Current State,action,reward,Response time,Next State:  (8, 11.786394321941378) 3 21.0 1059.89687994 (11.61852219546234, 9)
loss 372.5911865234375
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 374.70355224609375
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 2 20.0 986.02903554 (11.336751742492702, 10)
loss 372.9093933105469
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 369.578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 371.5574645996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 371.30596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 374.1492614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 370.6954650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 376.32550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 368.3536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 369.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 377.4917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 378.55340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.9790344238281
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 370.32489013671875
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 371.83026123046875
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 376.0162353515625
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 381.2344665527344
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 378.7470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 371.5699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 375.7452087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 374.4385681152344
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 374.2426452636719
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 372.35418701171875
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 373.572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 370.2644348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 374.9805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 375.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 374.3509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 380.3396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 372.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 373.20855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 370.4617004394531
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 373.9897766113281
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 373.7860412597656
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 2 20.0 924.913936648 (10.333617326102203, 10)
loss 378.560791015625
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 1 21.0 925.797758139 (10.390165524255663, 9)
loss 365.8813781738281
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 381.6703796386719
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 370.2670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 374.17083740234375
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 374.856689453125
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 1 21.0 941.225969064 (10.771376986314287, 9)
loss 370.0913391113281
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 2 21.0 986.945968488 (10.924797168745895, 9)
loss 374.22607421875
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 377.5412902832031
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 0 22.0 963.253801267 (11.271571944085663, 8)
loss 369.72650146484375
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 3 21.0 1029.8081916 (11.670334358779868, 9)
loss 379.2443542480469
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 374.881591796875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 373.5334777832031
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 376.3672180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 375.318603515625
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.9652404785156
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 372.66937255859375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 376.9804382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.2865905761719
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 370.953369140625
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 377.8089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 370.4759521484375
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 377.7680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 371.9474182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 377.64068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.8343811035156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 379.0386657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 375.36309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 371.8127746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 373.69512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 370.63739013671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 375.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 372.51446533203125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 374.8396301269531
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 1 21.0 1397.37841716 (19.25591252280865, 9)
loss 374.6689758300781
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 375.7127990722656
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 373.3334045410156
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 368.8509826660156
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 372.7962951660156
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 373.3432312011719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 375.84075927734375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 376.0031433105469
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 374.7481689453125
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 373.86505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 379.90191650390625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 368.34979248046875
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 375.9625244140625
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 373.1461486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 375.3998107910156
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 378.0820617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 375.1047668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 375.1163024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.7267761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 369.81719970703125
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 376.58203125
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 377.7328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 372.6422424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 373.6719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 378.15667724609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 377.748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 372.2756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 367.4841613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 369.7055969238281
############ Running episode number: 107  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 372.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 378.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 375.5821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 374.93170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 375.4832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 377.19189453125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 379.10662841796875
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 377.76678466796875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 375.933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 371.8250427246094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 372.8367614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 373.5007629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 372.5224304199219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 367.66064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 373.8296813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 370.12353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 374.8283996582031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 373.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 378.76904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 370.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 372.6111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.2919006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 373.3153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 370.759521484375
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 369.1055908203125
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 377.24957275390625
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 375.8810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 369.72747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 372.58111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 376.2756652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 371.702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 375.2376403808594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 378.3891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 374.16412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 377.6983337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 375.62078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 376.9994201660156
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 371.6743469238281
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 372.1739196777344
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 379.54034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 374.41656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 372.2904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 380.329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 370.06396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 372.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 370.64990234375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 371.4768371582031
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 372.4887390136719
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 377.1544189453125
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 1 21.0 1004.62682792 (12.19918626616789, 9)
loss 378.4486083984375
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 0 23.0 1061.68473805 (12.501496275411796, 7)
loss 374.3706359863281
Current State,action,reward,Response time,Next State:  (7, 12.501496275411796) 3 22.0 1154.88153049 (13.168618569876575, 8)
loss 373.7442626953125
Current State,action,reward,Response time,Next State:  (8, 13.168618569876575) 4 20.0 1140.68069275 (13.649658108197247, 10)
loss 379.5272216796875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 373.52374267578125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 371.54254150390625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 1 21.0 1156.21398489 (15.353965082180355, 9)
loss 375.46087646484375
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 374.5889587402344
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 370.7599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 372.8052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 381.218505859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 377.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.8253173828125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 374.82403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 373.68890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 376.1709899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 373.55322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 378.8185729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 372.84893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 377.57696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 375.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 368.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 374.88592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 373.5472106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 375.2417907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 378.8149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 375.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 375.5765075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 381.4799499511719
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 376.1181335449219
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 375.3077087402344
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 376.9361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 377.24017333984375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 373.6325378417969
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 0 22.0 1197.01631782 (15.750501603468638, 8)
loss 370.9049377441406
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 2 22.0 1291.57831736 (15.817158911312735, 8)
loss 373.0360412597656
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
loss 372.0956115722656
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 371.5525817871094
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 0 22.0 1220.65695786 (15.954793861767499, 8)
loss 380.51129150390625
Current State,action,reward,Response time,Next State:  (8, 15.954793861767499) 3 21.0 1303.51813652 (16.004586266677634, 9)
loss 375.0843505859375
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 370.43878173828125
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 376.10919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 374.07830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 370.8248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 370.16412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 372.6427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 375.3896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 376.68634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 372.221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 374.3934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 373.1554260253906
############ Running episode number: 108  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 376.320068359375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 377.8145446777344
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 373.4571228027344
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 0 22.0 993.95437024 (11.469111876584304, 8)
loss 377.6785888671875
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 4 20.0 1041.35337246 (11.336751742492702, 10)
loss 375.26324462890625
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 376.62725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 369.70635986328125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 372.2894287109375
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 375.24310302734375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 373.8877258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 375.0656433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 373.1913146972656
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 369.33380126953125
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 380.43939208984375
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 374.260009765625
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 372.3927917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 370.86614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 369.1188659667969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 370.7695617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 372.4906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 370.490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 373.796142578125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 371.1560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 373.5955505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 375.39312744140625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 375.2043762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 372.7673645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 378.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 377.4638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 380.7874450683594
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 372.6448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 376.5274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 369.55865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 377.4210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 374.2922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 372.03173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 374.35516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 370.37774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 372.9137878417969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 371.6346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 369.7633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 373.0902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 374.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 373.2666931152344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 371.4137878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 378.0311279296875
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 375.67431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 375.34405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 370.4134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.98199462890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 373.90155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 369.4615478515625
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 377.485595703125
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 377.3218994140625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 0 22.0 1156.21398489 (15.353965082180355, 8)
loss 374.27032470703125
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 372.3555603027344
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 4 19.0 1252.10338759 (16.466876895473597, 11)
loss 374.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 375.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 372.2336730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 374.3795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 372.760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.35272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 381.30828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 373.6427307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 373.6155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 377.4442443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 383.214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 372.8376159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 370.70068359375
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 377.4686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 372.68280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 366.6020812988281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 371.3004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 376.0486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 375.5555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 374.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 375.1189880371094
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 369.4578857421875
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 372.9334411621094
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 2 20.0 1223.7505224 (15.828704162850809, 10)
loss 378.1971130371094
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 2 20.0 1217.27964986 (15.550833128512703, 10)
loss 374.6542053222656
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 371.205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 375.9140930175781
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 372.8965759277344
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 379.4084777832031
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 0 22.0 1217.34610485 (15.892373986997768, 8)
loss 370.359619140625
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 3 21.0 1299.87001973 (15.954793861767499, 9)
loss 373.322998046875
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 376.0511169433594
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 371.1662292480469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 373.69482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 377.0848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 368.7349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 374.24896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 373.9390563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.2624206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 374.9297180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 378.19122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 377.6319885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 371.3080749511719
############ Running episode number: 109  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 370.7463073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 376.2183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 376.3949890136719
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 375.4091796875
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 373.5650329589844
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 4 19.0 979.00811241 (11.25610796929319, 11)
loss 370.3965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 379.2657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 377.1517028808594
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 375.3123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 378.7845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 371.6289978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 373.30841064453125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 377.1043395996094
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 374.8534240722656
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 372.2609558105469
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 371.9155578613281
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 375.3583068847656
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 374.25164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 376.781494140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 373.4109802246094
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 373.0618896484375
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 376.3612365722656
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 376.56512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 377.8409423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 374.4815979003906
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 377.67498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 371.0182189941406
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 374.8330993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 374.29168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 372.9473571777344
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 377.3985900878906
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 376.87603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 368.9868469238281
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 374.7841796875
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 375.9482421875
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 372.60693359375
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 372.1418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 374.6294250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 371.0906066894531
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 375.3853759765625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 375.1886291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.63592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 375.58465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 371.92144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 377.1419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 377.7797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 376.54791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 376.40679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 374.70196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 371.5643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 372.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 375.5518493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 378.5008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 375.5354919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.7840270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 376.8324279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 379.98583984375
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 371.6019592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 375.8964538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 371.2572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 382.0396423339844
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 375.63616943359375
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 2 20.0 1352.9188695 (18.671267839956315, 10)
loss 374.8659973144531
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 374.57659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 374.84136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 372.8013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 371.2238464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 372.88922119140625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 379.5777282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 371.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 375.2096862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 371.36810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 376.6095275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 374.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 375.7335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 372.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 368.9677429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 375.76336669921875
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 377.6124267578125
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 0 21.0 1210.97093797 (15.828704162850809, 9)
loss 374.53216552734375
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 370.33062744140625
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 0 22.0 1202.54023315 (15.446694946204717, 8)
loss 375.30340576171875
Current State,action,reward,Response time,Next State:  (8, 15.446694946204717) 3 21.0 1273.82239956 (15.750501603468638, 9)
loss 376.2763977050781
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 373.62933349609375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 372.00909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 375.59332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 371.4622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 374.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 372.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 371.2456359863281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 374.8056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 374.4189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 374.3901062011719
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 370.9801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 377.8583068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 375.134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 373.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 370.2325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 376.13555908203125
############ Running episode number: 110  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 372.4247741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 374.0320739746094
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 375.05914306640625
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 380.7598876953125
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 374.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 373.5791931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 374.61822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 371.07891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 374.79644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 372.88543701171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 373.4820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 369.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 371.4359436035156
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 370.47235107421875
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 1 22.0 980.32686333 (10.58735855349979, 8)
loss 375.4012756347656
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 3 21.0 989.819480251 (10.552868829802469, 9)
loss 367.01019287109375
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 374.49566650390625
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 370.366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 369.0200500488281
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 376.594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 375.27801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 372.5478515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 373.0026550292969
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 374.7630310058594
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 370.3880310058594
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 377.5654602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 377.4028015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 376.1141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 375.2138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.431640625
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 377.27996826171875
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 2 20.0 924.314209939 (10.24826025489064, 10)
loss 374.65521240234375
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 378.4359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 373.5106506347656
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 374.281982421875
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 378.52740478515625
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 370.10198974609375
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 374.2418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 378.27703857421875
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 2 19.0 917.140709305 (10.425974763084863, 11)
loss 374.69744873046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 371.4610290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 371.1644592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 373.5040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 371.624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 370.9397277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 369.3904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 372.9064636230469
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 375.4222717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 371.86407470703125
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 366.2383728027344
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 1 22.0 1061.68473805 (12.501496275411796, 8)
loss 374.46826171875
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 3 21.0 1101.69086701 (13.168618569876575, 9)
loss 373.0372009277344
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 1 22.0 1112.429735 (13.649658108197247, 8)
loss 372.01556396484375
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 3 21.0 1168.79494995 (14.283719188889453, 9)
loss 374.2696533203125
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 374.6785888671875
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 374.8056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 377.03265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 374.9821472167969
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 371.3619689941406
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 1 21.0 1272.5994393 (17.534967586021782, 9)
loss 377.9269104003906
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 375.35284423828125
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 0 22.0 1314.91162813 (17.944480812078613, 8)
loss 375.8440856933594
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 2 22.0 1419.80498244 (18.385807405229915, 8)
loss 372.59722900390625
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 3 21.0 1445.59822471 (18.671267839956315, 9)
loss 373.0555725097656
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 370.61285400390625
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 378.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 378.572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 377.45709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 377.7699279785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 372.3287658691406
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 374.2854309082031
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 4 19.0 1397.37841716 (19.25591252280865, 11)
loss 380.9617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 374.68450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 381.223388671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 383.3421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.7710266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 372.8087158203125
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 373.3036804199219
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 377.18695068359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 375.63751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 374.6474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 372.93438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 373.70562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 372.4167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 371.5740966796875
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 377.4863586425781
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 370.6527404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 374.0423278808594
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 375.53948974609375
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 377.3740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 370.79486083984375
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 378.8346252441406
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 375.29742431640625
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 0 22.0 1234.16752106 (16.229253414601111, 8)
loss 375.4449768066406
Current State,action,reward,Response time,Next State:  (8, 16.229253414601111) 2 22.0 1319.55886882 (16.295120821876548, 8)
loss 380.5256652832031
Current State,action,reward,Response time,Next State:  (8, 16.295120821876548) 3 21.0 1323.40847593 (16.667936385136993, 9)
loss 369.88751220703125
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 373.20733642578125
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 378.62127685546875
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 373.2846984863281
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 2 21.0 1315.75590499 (17.215992726625572, 9)
loss 372.9043884277344
############ Running episode number: 111  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 376.9697265625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 371.0132141113281
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 371.9811706542969
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 376.73590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 372.6718444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 372.8248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 377.38409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 379.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 374.1420593261719
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 374.4701843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 372.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 373.4842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 373.19659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 378.93011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 377.65966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 374.3744201660156
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 376.62933349609375
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 373.6141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 382.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 375.8603210449219
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 380.606689453125
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 377.53131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 375.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 376.16912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 378.0685119628906
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 373.3736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 377.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 382.7225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 377.62347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 372.4692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 375.56072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 377.33221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 375.4463806152344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 378.2181701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 376.20819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 371.7074890136719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 377.3341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 376.420166015625
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 375.5946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 375.5574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 380.06390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 375.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 375.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 372.87646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 370.8092041015625
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 375.2697448730469
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 376.8634338378906
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 0 22.0 975.550709187 (11.670334358779868, 8)
loss 373.4034729003906
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 3 21.0 1053.11377918 (11.819721938468785, 9)
loss 372.6487121582031
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 369.5371398925781
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 0 22.0 1024.75516863 (12.501496275411796, 8)
loss 377.2537841796875
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 3 21.0 1101.69086701 (13.168618569876575, 9)
loss 378.7106628417969
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 4 19.0 1112.429735 (13.649658108197247, 11)
loss 374.5276184082031
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 383.99603271484375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 376.8240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 374.0537109375
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 376.8937072753906
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 379.9308166503906
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 373.0809020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 383.0528869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 382.4542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 376.2093200683594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 372.11474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.9188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 378.2523498535156
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 375.08343505859375
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 0 23.0 1432.66131431 (19.340464848017284, 7)
loss 378.10040283203125
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 2 23.0 1581.06369535 (19.213467265587269, 7)
loss 375.8340148925781
Current State,action,reward,Response time,Next State:  (7, 19.213467265587269) 3 22.0 1573.14962117 (19.140765783401285, 8)
loss 379.3929138183594
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 2 22.0 1489.72161235 (19.385636054792762, 8)
loss 372.6283874511719
Current State,action,reward,Response time,Next State:  (8, 19.385636054792762) 3 21.0 1504.03300517 (19.223969507401588, 9)
loss 377.3387451171875
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 372.9293212890625
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 375.39202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 382.95880126953125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 372.8685607910156
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 378.1564636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 373.5647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 374.6895751953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 381.5623474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 374.2259521484375
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 379.3197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 373.27520751953125
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 378.4217529296875
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 377.1485900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 377.5920715332031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 377.34686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 372.2829284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 371.9541931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 375.6389465332031
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 375.82440185546875
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 2 20.0 1227.30449265 (15.947547279389703, 10)
loss 377.77703857421875
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 376.1602478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 377.77362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 381.3511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 374.1324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 371.1314392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 379.7685546875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 375.9461669921875
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 2 20.0 1271.23153331 (17.052961248403161, 10)
loss 378.5327453613281
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 380.36810302734375
############ Running episode number: 112  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 379.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 374.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 376.1255187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 376.4372863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 376.6315612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 378.28802490234375
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 374.4773254394531
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 381.6402282714844
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 373.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 371.66046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 374.31988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 383.6626892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 373.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 377.58782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 374.5078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 377.78338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 375.38763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 376.70709228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 374.6065673828125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 376.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 383.712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.9259033203125
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 375.5195617675781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 373.1319885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 379.2358093261719
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 372.83428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 370.4063415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 379.46820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 373.4853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 377.0904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 374.2372131347656
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 374.666748046875
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 371.546630859375
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 1 21.0 922.767593645 (10.236991269871366, 9)
loss 380.8757629394531
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 375.0900573730469
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 374.74560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 381.0902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 373.9754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 375.2735595703125
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 373.46331787109375
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 2 20.0 930.696774523 (10.546025383098053, 10)
loss 379.1257019042969
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 372.1700744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.5263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 381.0265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 370.6318054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 375.3902893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 374.3531188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 376.22039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 377.4397888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 375.37835693359375
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 0 21.0 1012.73322757 (12.501496275411796, 9)
loss 377.9031982421875
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
loss 378.7415771484375
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 0 22.0 1076.17782493 (13.649658108197247, 8)
loss 375.27685546875
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 3 21.0 1168.79494995 (14.283719188889453, 9)
loss 376.2461853027344
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 1 22.0 1170.79974938 (14.677479537099185, 8)
loss 377.88629150390625
Current State,action,reward,Response time,Next State:  (8, 14.677479537099185) 1 23.0 1228.86576266 (15.353965082180355, 7)
loss 378.85626220703125
Current State,action,reward,Response time,Next State:  (7, 15.353965082180355) 1 24.0 1332.63806181 (15.836943704090487, 6)
loss 377.7298889160156
Current State,action,reward,Response time,Next State:  (6, 15.836943704090487) 2 24.0 1444.84016836 (16.466876895473597, 6)
loss 377.72174072265625
Current State,action,reward,Response time,Next State:  (6, 16.466876895473597) 3 23.0 1486.50338648 (16.871606159345866, 7)
loss 381.3500061035156
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 4 21.0 1427.21249257 (17.534967586021782, 9)
loss 378.5426940917969
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 375.6680603027344
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 372.21868896484375
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 384.95068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 374.160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 374.0018005371094
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 375.5152893066406
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 378.3630676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 374.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 377.336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 372.5259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 380.8429260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 378.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 377.8539123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 379.8651428222656
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 378.705078125
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 374.4122314453125
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 375.88531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 374.71514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 377.3226013183594
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 377.2760314941406
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 4 19.0 1258.0576862 (15.828704162850809, 11)
loss 372.42919921875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 374.2632141113281
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 379.237060546875
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 377.20538330078125
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 382.8323974609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 379.1707763671875
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 380.0359802246094
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 371.1305236816406
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 381.5326232910156
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 374.3042297363281
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 374.9093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 376.17730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 373.9576110839844
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 378.33978271484375
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 372.79718017578125
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 375.9901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 378.15057373046875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 386.313720703125
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 380.5175476074219
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 381.3232421875
############ Running episode number: 113  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 376.1824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 375.8799133300781
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 384.88055419921875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 371.8448486328125
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 375.29949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 375.8385925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 370.6160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 382.10015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 372.95452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 378.97357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 383.7854309082031
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 379.18792724609375
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 376.45391845703125
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 376.33001708984375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 4 19.0 942.310823749 (10.58735855349979, 11)
loss 375.59759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 376.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 375.3716125488281
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 378.3912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 378.66143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 373.6372985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 378.2309875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 371.69708251953125
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 373.4964599609375
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 379.5670166015625
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 382.5191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 376.3699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 378.1541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 375.7804260253906
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 374.5847473144531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 373.7563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 379.21319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 371.9895935058594
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 377.70941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 374.9371337890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 373.0787658691406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 376.32623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 381.281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 377.251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 377.2020568847656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 379.38958740234375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 2 19.0 919.032945938 (10.546025383098053, 11)
loss 375.26824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 380.2059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 377.75347900390625
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 377.1592102050781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 380.30072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 378.29974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 376.02642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 383.0117492675781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 383.177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 369.3650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 374.2355651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 375.07965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 377.25537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 375.6905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 380.6390075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 381.3269958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 376.8383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 380.4161682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 378.33111572265625
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 381.0153503417969
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 1 21.0 1307.78684385 (17.669285735563751, 9)
loss 373.48492431640625
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 380.4908142089844
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 378.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 379.4944152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 380.15716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 374.80401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 374.4349365234375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 370.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 375.4204406738281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 378.1836242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 375.49896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 382.3055725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 381.2602233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 375.1737365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 383.3051452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 376.8245849609375
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 374.9269104003906
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 377.1006774902344
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 4 19.0 1271.03516211 (16.237094554670044, 11)
loss 384.8139343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 379.927734375
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 375.2012023925781
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 377.0057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 372.5293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 380.3670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 379.8002014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 373.8462219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 379.72271728515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 375.5106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 376.4341125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 372.5798034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 379.95172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 375.1664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 374.1955261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 379.5857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 373.2724914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 381.7367248535156
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 378.83697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 375.68511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 375.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 376.0846862792969
############ Running episode number: 114  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 375.19805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 375.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 376.21929931640625
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 379.3405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 375.4079895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 383.1972351074219
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 378.1571044921875
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 374.85736083984375
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 0 22.0 960.915933313 (10.931193889570471, 8)
loss 375.548095703125
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 3 21.0 1009.91486598 (10.816918347608043, 9)
loss 384.2757568359375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 1 22.0 989.329834005 (10.819208572963639, 8)
loss 377.9540100097656
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 3 21.0 1003.36990711 (10.768325938188134, 9)
loss 383.48590087890625
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 384.6009521484375
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 381.3052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 376.17095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 377.21380615234375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 369.5442810058594
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 386.6697082519531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 375.60186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 373.4847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 382.7376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 376.49114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 374.28466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 376.16461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 372.114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 374.8041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 372.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 375.5263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 376.12359619140625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 373.3081359863281
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 2 21.0 964.125321415 (10.305649118067803, 9)
loss 381.6910705566406
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 1 22.0 962.567412952 (10.24826025489064, 8)
loss 379.9442138671875
Current State,action,reward,Response time,Next State:  (8, 10.24826025489064) 3 21.0 970.000949704 (10.276491935146446, 9)
loss 375.4178161621094
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 374.3474426269531
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 4 19.0 920.672316722 (10.236272697871373, 11)
loss 373.9024963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 372.2476501464844
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 382.178466796875
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 378.58099365234375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 378.57904052734375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 374.3290100097656
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 377.3631896972656
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 0 22.0 937.064750655 (10.655373370049301, 8)
loss 378.7599182128906
Current State,action,reward,Response time,Next State:  (8, 10.655373370049301) 3 21.0 993.794592261 (10.624473674922116, 9)
loss 377.3820495605469
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 2 21.0 979.256305105 (10.771376986314287, 9)
loss 374.18328857421875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 378.1642150878906
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 368.0159912109375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 377.7971496582031
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 377.5250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 379.51409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 377.27117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 372.1418151855469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 383.3236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 376.5024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 380.4438171386719
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 376.36669921875
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 2 20.0 1156.21398489 (15.353965082180355, 10)
loss 379.6322326660156
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 376.92523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 381.80828857421875
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 379.4869079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 383.2822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 373.4901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 372.414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 376.20751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 378.3007507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 377.1374206542969
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 379.6006774902344
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 376.7052917480469
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 1 21.0 1403.55780672 (19.213467265587269, 9)
loss 372.88519287109375
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 379.3405456542969
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 376.9219970703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 385.3508605957031
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 382.0426940917969
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 373.5736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 375.9110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 374.7442932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 378.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 382.66021728515625
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 374.0380554199219
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 380.43994140625
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 370.7283020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 378.8818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 375.68218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 373.5631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 377.42578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 376.8524475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 378.1637268066406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 375.68487548828125
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 370.8323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 378.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 382.2987365722656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 377.6236877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 375.45269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 380.8295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 378.64892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 375.5465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 382.0587463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 379.1203918457031
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 374.0118713378906
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 377.68817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 374.89813232421875
############ Running episode number: 115  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 379.53094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 373.1371765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 376.9454040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 371.26904296875
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 377.4803161621094
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 377.3172302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 374.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 373.1046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 372.21514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 377.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 372.4772033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 382.07965087890625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 373.0075988769531
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 380.1180725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 375.6260681152344
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 379.3503723144531
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 374.4302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 378.1413269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 376.4959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 370.4669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 376.5865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 378.55535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 373.3989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 374.09405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 378.4723205566406
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 377.8111877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 376.2252502441406
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 376.1361389160156
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 376.7369079589844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 384.4206848144531
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 380.902587890625
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 378.0499267578125
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 371.4529113769531
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 370.4402160644531
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 374.3303527832031
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 374.95196533203125
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 0 23.0 965.930171009 (10.316955310454549, 7)
loss 374.6064758300781
Current State,action,reward,Response time,Next State:  (7, 10.316955310454549) 3 22.0 1018.74807882 (10.333617326102203, 8)
loss 378.265380859375
Current State,action,reward,Response time,Next State:  (8, 10.333617326102203) 3 21.0 974.989626232 (10.390165524255663, 9)
loss 377.47198486328125
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 373.379638671875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 374.3702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 378.7778625488281
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 373.9558410644531
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 376.0521545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 378.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 370.9927673339844
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 381.6907653808594
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 375.2790222167969
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 375.94158935546875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 374.3087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 374.190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 371.7453308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 376.994873046875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 372.60223388671875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 372.78167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 373.42230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 371.1962585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 372.1307678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 380.65985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 376.9599304199219
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 373.4884338378906
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 373.9980773925781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 378.068115234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 375.7068176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 371.0510559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 378.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 376.2997741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 379.28363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 374.9149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 376.4631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 371.8067932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 374.0828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 375.60797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 375.193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 373.9974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 384.865234375
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 375.1166076660156
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 374.00799560546875
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 374.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 377.80059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 381.033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 373.214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 379.67584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 375.722412109375
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 372.3995666503906
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 1 22.0 1251.06775133 (15.829956988360925, 8)
loss 370.8086242675781
Current State,action,reward,Response time,Next State:  (8, 15.829956988360925) 4 20.0 1296.22207104 (15.892373986997768, 10)
loss 379.2830505371094
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 373.8975524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 380.44329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 380.5354919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 377.1257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 373.5630187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 370.3798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 371.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 373.3960266113281
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 374.7054443359375
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 2 20.0 1261.79596106 (16.836383524612351, 10)
loss 374.2295837402344
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 370.0738220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 371.72161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 372.0329895019531
############ Running episode number: 116  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 378.86810302734375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 367.798828125
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 382.558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 372.34698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 371.73345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 371.4241027832031
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 374.150634765625
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 371.55767822265625
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 372.26300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 375.48834228515625
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 371.397705078125
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 380.8542175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 378.7724304199219
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 371.3462219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 374.7281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 370.0801696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 372.8479309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 380.33038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 376.03668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 378.39752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 374.6065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 377.109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.96307373046875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 376.17120361328125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 374.8096618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 376.0082702636719
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 377.58038330078125
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 368.2979431152344
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 372.4774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 373.43450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 378.3046569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 373.51507568359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 373.0770568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 373.218994140625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 375.1669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 376.3919982910156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 371.74078369140625
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 373.56964111328125
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 373.21002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 375.5931701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 387.2835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 376.6832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 371.56927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 382.21697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 374.3160705566406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 370.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 380.24090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 368.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 374.4369201660156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 372.6219787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 379.5709533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 384.6085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 376.81671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 377.6116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 370.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 373.2906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 372.9150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 376.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 374.1345520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 372.8223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 375.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 374.9326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.77972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 370.9788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.8502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 373.19659423828125
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 371.6375427246094
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 371.7974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 373.925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 376.424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 377.74285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.3550720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 374.31414794921875
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 371.6180419921875
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 376.4648132324219
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 372.03228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 369.2544860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 371.1368713378906
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.7104797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 373.39691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 376.3283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 372.3373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 378.3398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 379.6012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 370.4529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 376.7564697265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 371.3083801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 371.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 373.3542175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 374.7912902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 374.96710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 371.8728942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.7420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.88299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 373.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 378.9415283203125
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 377.7021789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 371.35443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 373.2303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 374.6258544921875
############ Running episode number: 117  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 371.12445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 374.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 370.25689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 370.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 376.2408142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 380.0362548828125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 374.66473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 378.9122619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 369.50384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 374.1594543457031
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 381.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 375.77294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 379.74041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 370.95654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 371.2568359375
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 375.8233947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 374.03692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 374.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 382.32720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 373.3583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 371.2608642578125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 378.83013916015625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 1 21.0 927.777654938 (10.42733414151318, 9)
loss 376.2578125
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 371.863525390625
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 1 21.0 928.707336523 (10.344006106602812, 9)
loss 375.9706115722656
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 368.51141357421875
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 373.83380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 379.3992004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 378.80352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 372.4871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 374.2945251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 374.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 369.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 375.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 372.70721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 378.1932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 371.63861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 376.12115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 371.96527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 372.7694091796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 371.95013427734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 374.43988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 372.94012451171875
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 374.11016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 374.3343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 373.95916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 371.7415466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 378.4699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 374.7880554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 378.7508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 374.8052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 372.78271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 370.56268310546875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 374.4912414550781
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 364.48419189453125
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 375.5426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 377.62176513671875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 376.531494140625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 379.4982604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 372.3807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 373.59393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 371.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 372.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.28271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 377.5718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.5932312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 375.5106506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 384.6360168457031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 372.31402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 377.257080078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.3447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 374.79095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 378.63824462890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 374.1058044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 377.2525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 370.1834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 375.7420959472656
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 373.3844909667969
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 2 21.0 1273.04930988 (15.950694610794756, 9)
loss 373.0090026855469
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 379.7997131347656
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 377.58782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 374.24359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 373.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 371.85394287109375
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 372.5532531738281
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 373.2492370605469
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 377.214599609375
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 370.4761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 374.9299011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.6168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 376.5195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 375.3922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 373.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 368.497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 367.3623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 373.2168884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 370.10479736328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 4 19.0 1258.27117243 (17.052961248403161, 11)
loss 381.63128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 374.03045654296875
############ Running episode number: 118  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 373.5137634277344
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 374.34478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 370.7517395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 375.41015625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 376.411865234375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 4 19.0 979.00811241 (11.25610796929319, 11)
loss 376.72808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 375.1816101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 377.4108581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 376.0660705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 370.3076171875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 372.43756103515625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 4 19.0 948.856481751 (10.772009508959538, 11)
loss 370.15838623046875
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 379.2200927734375
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 371.2702331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 375.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 369.4098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 378.25885009765625
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 375.24493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.73431396484375
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 376.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.67376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 374.34051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 373.4852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 369.4775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 374.30633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 371.4410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 376.7427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 372.0547790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 377.40625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 373.67034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.6978454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 374.8281555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 369.2660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 378.06353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 368.3742370605469
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 374.7247619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 380.6770324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 375.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 373.86297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 373.21954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 374.91937255859375
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 370.41278076171875
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 372.2783508300781
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 374.6239929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 369.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 375.5993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 374.3228454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 373.619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 373.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 372.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 371.6751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 375.68316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 369.1569519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 377.4285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 378.1591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 369.2995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 372.3064880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 378.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 374.0698547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 371.0812072753906
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 373.6322937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 379.663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 378.23785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 375.05804443359375
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 373.8188171386719
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 366.02581787109375
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 372.6103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 374.3787841796875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 369.687744140625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 370.2781677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 368.6189270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 371.11785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 373.11834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 376.07257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 371.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 376.4350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 370.97747802734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 381.0997619628906
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 370.2468566894531
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 375.8032531738281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 374.5613708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 370.77398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 372.3922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 372.3835754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 367.7015075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 370.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 372.17645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 372.07684326171875
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 375.6835021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 373.42230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 374.3157958984375
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 370.87921142578125
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 371.62762451171875
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 373.55804443359375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 377.7102355957031
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 4 19.0 1271.23153331 (17.052961248403161, 11)
loss 368.4325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.7945861816406
############ Running episode number: 119  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 373.1709289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 376.4305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 368.55206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 368.9857482910156
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 369.6966552734375
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 379.51849365234375
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 1 21.0 974.730436685 (11.027107764209074, 9)
loss 370.90850830078125
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 375.9466247558594
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 368.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.72735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 371.82867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 371.7171936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 372.5730285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 374.7244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 371.4912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 374.6823425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 370.6488342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 376.8735046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 371.4988098144531
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 367.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 368.0615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 372.3033142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 373.79437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 369.7711181640625
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 378.13623046875
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 371.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 376.3858337402344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 373.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 371.6461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 369.9631042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 377.2823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 374.3404541015625
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 373.2341613769531
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 369.07757568359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 369.9981994628906
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 369.2037048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 373.0667724609375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 374.1575012207031
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 375.40093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 368.48931884765625
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 370.3487243652344
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 370.89312744140625
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 368.8571472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 377.20318603515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 371.3367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 374.0973815917969
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 374.2089538574219
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 1 22.0 1034.00195058 (11.819721938468785, 8)
loss 378.7731628417969
Current State,action,reward,Response time,Next State:  (8, 11.819721938468785) 3 21.0 1061.84470565 (12.19918626616789, 9)
loss 372.6090393066406
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 373.1244201660156
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 369.7542419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.5685119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 370.2250061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 371.9730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 375.11578369140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 369.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 378.49139404296875
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 370.765869140625
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 384.5008239746094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 4 19.0 1314.91162813 (17.944480812078613, 11)
loss 372.1615905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.6059875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.9992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 372.78997802734375
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 372.5691223144531
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 368.46514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 374.2692565917969
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 369.02288818359375
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 373.8557434082031
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 0 22.0 1405.95387237 (19.223969507401588, 8)
loss 378.735107421875
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 368.70709228515625
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 4 19.0 1431.06953264 (19.08360399753829, 11)
loss 373.6819763183594
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 370.24871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.8119812011719
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 373.83660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 373.12847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 371.26806640625
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 372.44921875
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 375.1690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 370.9804382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 370.50079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 373.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 376.744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 371.3687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 374.3039245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 373.74261474609375
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 373.6416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 372.4156188964844
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 369.5331115722656
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 379.1044616699219
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 363.58349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 371.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 369.714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 371.1903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 375.11737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 377.3074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 370.4687805175781
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 377.3810729980469
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 366.81439208984375
############ Running episode number: 120  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 376.9810791015625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 376.6691589355469
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 379.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 374.8211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 371.23748779296875
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 371.30755615234375
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 369.9278869628906
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 375.8152160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 376.35302734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 374.647216796875
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 371.93890380859375
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 368.7900085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 369.5662536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 369.46240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 372.8233337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 377.8779602050781
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 368.9033508300781
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 376.9570617675781
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 372.4902038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 372.31170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 373.26953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 377.7084655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 368.8380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 371.09857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 370.3797302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 373.047119140625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 370.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 376.17230224609375
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 372.4843444824219
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 368.8020324707031
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 369.82568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 372.35638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 370.73291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 369.2536926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 369.8399353027344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 371.0958557128906
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 375.14642333984375
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 370.6044006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 370.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 371.8428039550781
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 375.215087890625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 370.114013671875
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 375.6822509765625
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 375.1761779785156
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 2 20.0 949.018321829 (10.924797168745895, 10)
loss 378.69622802734375
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 372.5863952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 368.8074645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 368.9728088378906
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 370.31134033203125
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 373.2239990234375
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 374.56842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 374.13372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.1995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 369.4168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 374.5439147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 371.33795166015625
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 377.8763427734375
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 372.3970031738281
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 368.98565673828125
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 371.21087646484375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 369.63861083984375
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.8578796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.4293518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.2325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 371.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 370.47265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 372.6103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 376.7958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 373.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.8380432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 373.3734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 373.5004577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 378.3199157714844
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 372.0430603027344
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 375.7584533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 370.91424560546875
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 377.1106872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 371.43450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 377.58160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 373.1707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 375.51007080078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 371.5849609375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 369.4877624511719
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 373.6554260253906
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 371.78253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 368.12371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 369.9748229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 368.14892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 371.7745056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 373.668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 371.39508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 372.19268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 368.4254455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 370.5650329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 370.6179504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 368.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 370.794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 370.8046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 367.7737121582031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 372.6478576660156
############ Running episode number: 121  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 372.51495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 372.4599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 367.38702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 370.0836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 375.7730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 369.6638488769531
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 369.8904724121094
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 370.57427978515625
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 371.30059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.43408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 369.7015075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 367.7052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 372.3320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 373.3915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 369.2185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.56939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 370.8468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.248779296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 371.09552001953125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 369.91845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.9998474121094
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 370.43505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.9082946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.10009765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 366.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 369.9978332519531
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 371.29345703125
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 370.2115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 373.1956481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 375.8643493652344
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 374.5363464355469
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 374.7193603515625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 371.8495788574219
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 373.7782897949219
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 373.078125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 376.7181701660156
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 369.1624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 368.9411926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 368.58843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 369.10565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 370.9548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.2937927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.05560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 375.9271545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 376.3509826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 371.2629089355469
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 369.12530517578125
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 375.7035827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 371.72430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 370.2991027832031
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 373.3825988769531
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 371.08935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.88458251953125
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
loss 371.0422058105469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 370.85003662109375
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 373.6368713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 372.8769226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 373.50054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 373.6707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 366.5431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.7403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 370.2607727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 370.41961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 369.1983337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 370.1946716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.0610656738281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.4354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.22125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.6248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 368.9352111816406
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 367.5511779785156
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 368.56719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.3887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 368.7364196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 371.8224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 371.25885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 369.7904357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 370.99102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 371.322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 372.2386169433594
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 370.94342041015625
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 369.82183837890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 370.76617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 370.3878173828125
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 365.25408935546875
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 371.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 367.32232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 372.4326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 368.66925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.9082336425781
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 373.8935852050781
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 372.9815368652344
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 370.38214111328125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 372.43231201171875
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 373.67010498046875
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 373.74945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 368.075439453125
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 374.0848083496094
############ Running episode number: 122  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.45770263671875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 368.431396484375
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 369.74774169921875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 375.56982421875
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 367.3589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.1741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 370.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 371.7090759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 371.6250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 368.3804016113281
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 367.2153015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 377.98223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 372.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 365.3926086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 373.5635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 371.2690124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.4711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 370.0494079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 376.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 372.0171203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 372.20367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.59283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 371.2728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.9279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 368.97882080078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 366.0190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 367.3818054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 366.0057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 372.8565368652344
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 370.114013671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 375.0411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 371.9389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 372.1331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 368.8217468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 367.7039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 374.46099853515625
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 373.69647216796875
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 372.2525634765625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 366.0491638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.0165100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.3873596191406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 372.1981506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 372.8819274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 369.3975524902344
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 369.0011901855469
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 373.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 369.9267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.6190185546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.53045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 369.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 373.1157531738281
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 374.1220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 368.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.9873962402344
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 371.22967529296875
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 369.4947204589844
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 365.8847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.5672912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 370.6807556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.6819152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.1258239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.4377136230469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.94281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.5242614746094
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 370.0216979980469
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 369.7549133300781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 374.13897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.3330383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 370.1556091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 370.2938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 366.3690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.7535705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 366.9896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.30633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 367.6343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.36114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.31402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.2374572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.0693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 368.3740539550781
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 369.5753479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 370.40576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 371.4292907714844
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 365.73992919921875
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 365.4742431640625
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 361.97235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.9375305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 368.30975341796875
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 370.86480712890625
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 369.5259704589844
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 374.72271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 369.56243896484375
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 371.1015319824219
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 366.0286560058594
############ Running episode number: 123  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 371.4256896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 369.1345520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 368.2561950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 369.595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 369.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.6839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 367.30047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 369.4169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 370.6757507324219
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 365.9267883300781
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 368.751220703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 4 19.0 951.555504911 (10.768325938188134, 11)
loss 366.10675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.91290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 373.4337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 369.4608154296875
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 373.4441833496094
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 366.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.8602600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 370.0037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 367.45135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.3835754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 371.39874267578125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 371.1253356933594
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 367.3815612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 368.8455505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.4085998535156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 367.083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 369.4450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.70068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.8743896484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 365.2138366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.552978515625
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 371.9884338378906
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 2 21.0 958.973513426 (10.236272697871373, 9)
loss 366.6827087402344
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 368.97174072265625
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 366.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.6563720703125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 369.57928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 368.8446960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.0762634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 367.2603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.5859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.50421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 368.1583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 370.127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 370.1513977050781
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 371.9144592285156
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 368.47528076171875
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 368.7298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 369.472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.4244689941406
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 368.7334289550781
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 370.04998779296875
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 369.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 367.1870422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 370.4233703613281
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 370.51605224609375
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 368.2069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.14129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.3966979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 370.1016540527344
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 367.7174072265625
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 365.8570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 367.0158386230469
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 367.7995910644531
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 368.39141845703125
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 368.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 372.5190124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 373.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 370.3163146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 368.1894226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.4383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 370.8447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.9148254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 369.85406494140625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 368.7779846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.26092529296875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 369.4731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.1048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.9906921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 369.6711120605469
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 370.8083801269531
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 365.3877868652344
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 1 21.0 1226.60915635 (16.017694914042416, 9)
loss 366.78887939453125
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 367.6861267089844
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 373.95977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.8728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.5731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 368.94573974609375
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 368.6604919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 371.75885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.5727233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.1130065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 370.2489318847656
############ Running episode number: 124  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 368.6356506347656
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 371.7593688964844
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 367.47271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 368.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.2575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.2060241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 366.281005859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 368.8285217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.30389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 368.4728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.55780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 369.33013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 370.9145812988281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 365.5830078125
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 367.4555969238281
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 364.33624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 370.8319396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 368.094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 372.3274230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.4555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.99761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 372.17120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 372.2091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 374.4026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.48126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.9136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.0646667480469
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 369.68914794921875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 366.9483337402344
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 367.38787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 370.15338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.58599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 368.39422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 370.3037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.41424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.3818664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.4970397949219
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 366.61187744140625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 369.1831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.6501770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.0025329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.1167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.69940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 367.4304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.6663513183594
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 363.55926513671875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 1 21.0 1004.62682792 (12.19918626616789, 9)
loss 363.3736267089844
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 371.01104736328125
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 369.2784729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.49078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.3738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.2303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.38592529296875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 368.9755554199219
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 366.1121520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.5748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 367.3399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 366.6728210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.9447326660156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 370.890380859375
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 366.4060363769531
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 1 21.0 1387.00434183 (19.286321916040979, 9)
loss 365.58465576171875
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 366.8345031738281
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 369.8268737792969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 4 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.6656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 369.50750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 367.8350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.720703125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 368.50018310546875
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 368.1985778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 367.53948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.2177429199219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 368.29541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.1640319824219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 365.2893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 368.42291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 369.3766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.1480407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 369.0362854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.25262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.78466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 369.4546813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 367.3271789550781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 367.2141418457031
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.56402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.7372741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 366.66180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 372.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.9591979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.5519104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.85516357421875
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 366.35247802734375
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 368.82708740234375
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 369.486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 369.5829772949219
############ Running episode number: 125  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 367.4542541503906
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 369.031982421875
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 367.563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.65655517578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 369.9715881347656
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 371.4255065917969
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 364.6924133300781
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 366.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.9892272949219
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 364.29974365234375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 371.6495361328125
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 369.37249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.7823486328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 368.41162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.1690979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.45977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 369.9648742675781
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 367.0500183105469
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 366.1777648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.59869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.4609069824219
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 367.0498962402344
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 368.9059753417969
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 362.2385559082031
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 1 22.0 966.902645924 (10.344006106602812, 8)
loss 367.2997741699219
Current State,action,reward,Response time,Next State:  (8, 10.344006106602812) 3 21.0 975.596796379 (10.319026962956018, 9)
loss 366.10699462890625
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 2 21.0 963.267677113 (10.30224719189987, 9)
loss 366.8671569824219
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 367.5609130859375
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 366.1100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 367.0417175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.1081237792969
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 374.5522155761719
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 370.7564392089844
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 371.74200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 370.40325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.57220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.4197998046875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 367.0836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.83319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.77239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 368.6033935546875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 365.5422058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.05963134765625
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 364.5906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 367.63897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 369.6730041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 368.5954284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 367.505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.1514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 367.3174133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.69500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.46380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 368.5203552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.24871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.7223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.7104187011719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.1150207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.34942626953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 368.8713073730469
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 366.2415771484375
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 0 23.0 1348.01745033 (17.944480812078613, 7)
loss 370.1136474609375
Current State,action,reward,Response time,Next State:  (7, 17.944480812078613) 0 25.0 1494.0705337 (18.385807405229915, 5)
loss 367.35296630859375
Current State,action,reward,Response time,Next State:  (5, 18.385807405229915) 3 24.0 1946.89361927 (18.671267839956315, 6)
loss 371.53448486328125
Current State,action,reward,Response time,Next State:  (6, 18.671267839956315) 2 24.0 1632.29983282 (19.02839494033929, 6)
loss 365.8257751464844
Current State,action,reward,Response time,Next State:  (6, 19.02839494033929) 3 23.0 1655.91989997 (19.286321916040979, 7)
loss 366.8762512207031
Current State,action,reward,Response time,Next State:  (7, 19.286321916040979) 3 22.0 1577.68968482 (19.340464848017284, 8)
loss 366.1233215332031
Current State,action,reward,Response time,Next State:  (8, 19.340464848017284) 3 21.0 1501.39298325 (19.213467265587269, 9)
loss 366.1499938964844
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 369.4062805175781
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 367.76904296875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 368.134765625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 367.242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 369.2119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.39337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 367.8868408203125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 365.2576904296875
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 369.1983337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 372.590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.5146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 369.2640686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 369.53643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.64251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.5658264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.6727600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.9779968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.8098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 372.6127014160156
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 365.5138854980469
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 364.8068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.2853088378906
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 371.5145568847656
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 369.9836120605469
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 369.50970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.37188720703125
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 369.1139221191406
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 366.3214111328125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 365.6409606933594
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 366.8127136230469
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 4 19.0 1271.23153331 (17.052961248403161, 11)
loss 372.4578552246094
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 368.7933654785156
############ Running episode number: 126  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 368.3028259277344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 369.0386657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.2635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 369.05810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.3277893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 371.8714294433594
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 367.9224548339844
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 365.857177734375
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 369.2710876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 369.4268798828125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 369.34576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.7582702636719
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 367.0547180175781
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 1 21.0 939.257231149 (10.552868829802469, 9)
loss 366.6266784667969
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 1 22.0 975.508144832 (10.553846649940214, 8)
loss 370.013427734375
Current State,action,reward,Response time,Next State:  (8, 10.553846649940214) 3 21.0 987.860883917 (10.489125480251131, 9)
loss 370.9414978027344
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 1 22.0 972.171495057 (10.448897752470936, 8)
loss 370.83746337890625
Current State,action,reward,Response time,Next State:  (8, 10.448897752470936) 3 21.0 981.727167119 (10.433149880183072, 9)
loss 370.0836181640625
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 366.9638977050781
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 2 20.0 931.538941947 (10.370942817486826, 10)
loss 369.5211181640625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 364.2890319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 368.13128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 369.9251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 371.1587219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 372.6551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 370.6221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 371.26068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.9895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 367.3018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 373.01080322265625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 368.73809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 376.7939758300781
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 367.67041015625
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 366.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 367.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 372.46954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.6332702636719
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 2 19.0 919.032945938 (10.546025383098053, 11)
loss 368.54052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 373.595458984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 376.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 367.5011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 368.7287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 368.1584777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 369.63983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.55908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 371.6746520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 376.9247131347656
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 368.37176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 368.1083068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 366.01531982421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.8169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 373.69219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.11029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.36474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 369.6605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.8555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 373.22381591796875
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 371.42291259765625
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 367.1265563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 368.27239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 373.65240478515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.36663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.45355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.48297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 370.65509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 369.2162780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.88385009765625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 369.91998291015625
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 2 21.0 1400.30471596 (18.375894992990247, 9)
loss 369.77862548828125
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 0 23.0 1385.00495784 (17.82724819986867, 7)
loss 365.1650390625
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 3 22.0 1486.76498054 (17.229782241685768, 8)
loss 374.35711669921875
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 4 20.0 1378.03457101 (16.84211602880065, 10)
loss 370.68841552734375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 364.6158752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 368.3283996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 368.2040100097656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 368.70928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 370.2982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 371.0162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.9889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.79949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.8164367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 370.25262451171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 369.4776916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 371.1598815917969
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 369.8999938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 372.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 370.57611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.6659240722656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 369.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 371.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 372.2278137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.8083190917969
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 369.8194885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.8897399902344
############ Running episode number: 127  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 370.6351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 374.6689758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.4744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 368.1872863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 371.81488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 369.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 372.1983947753906
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 371.3458251953125
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 368.8773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 374.0110778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 367.2985534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 366.7058410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 372.2285461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 369.6661682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 368.737548828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 365.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 372.34783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 371.1236877441406
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 368.1257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 371.1347351074219
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 371.692626953125
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 367.231689453125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 368.21160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 375.223876953125
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 370.31488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 372.622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 370.17633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 369.7099304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 368.5871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 371.6123962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 369.44140625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 370.5551452636719
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 368.1260070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 372.4305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 369.1553039550781
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 370.2368469238281
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 366.945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 370.5030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.7073974609375
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 368.1703796386719
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 369.12677001953125
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 367.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 368.6435852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 370.73529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.5978698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 371.3970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 369.5311279296875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 370.0994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.3840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.7120666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 372.0833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 368.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.83447265625
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 366.68072509765625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 368.20172119140625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 364.9345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.7204284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 370.2274475097656
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 369.8368835449219
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 366.8843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.6979675292969
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 371.5088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 372.78515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 369.3869934082031
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.98876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 369.33258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 368.4433288574219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 369.1039123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.0556640625
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.2338562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.4720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 368.83343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 369.26129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.26995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 369.4415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 373.2105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 369.35223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 370.09576416015625
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 366.3269348144531
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 372.904052734375
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 0 22.0 1217.34610485 (15.892373986997768, 8)
loss 372.2761535644531
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 3 21.0 1299.87001973 (15.954793861767499, 9)
loss 371.6816101074219
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 369.6852111816406
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 370.33721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 371.47406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 373.4776306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 373.5739440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 370.7366638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 376.8302001953125
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 372.9920654296875
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 369.5207214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 370.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 371.1785888671875
############ Running episode number: 128  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 374.5834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 367.45098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 367.13226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 366.4714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.86761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 370.81475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 367.1054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 367.4027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.93438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 370.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 369.84149169921875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 367.1227722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 367.25640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.2403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.7459716796875
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 366.7517395019531
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 367.2609558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.97064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 371.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 373.32275390625
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 368.6732482910156
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 367.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 374.97216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.26959228515625
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 369.34783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 370.3774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 369.7059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 367.2304382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 369.6058044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.23779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 372.79803466796875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 364.76666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 367.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 368.69439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 367.5087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.4277648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 377.3498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 370.687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 373.705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.3729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 369.728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.3290710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 371.535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.3188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.88958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 371.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 370.59521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 371.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 369.5740661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 368.0389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.5830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 370.1114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 378.14813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 368.472900390625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.2637023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.7676086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 369.1740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 369.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 366.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 369.4696960449219
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 365.14093017578125
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 367.629150390625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 368.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 372.0022888183594
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 368.95355224609375
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 0 23.0 1432.66131431 (19.340464848017284, 7)
loss 368.375244140625
Current State,action,reward,Response time,Next State:  (7, 19.340464848017284) 3 22.0 1581.06369535 (19.213467265587269, 8)
loss 376.3034362792969
Current State,action,reward,Response time,Next State:  (8, 19.213467265587269) 3 21.0 1493.97063558 (19.140765783401285, 9)
loss 370.75384521484375
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 369.22662353515625
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 371.9093933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.73834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 368.83404541015625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 371.7063903808594
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 367.5445251464844
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.3658142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 367.1639709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 366.16455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 378.1446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 367.4485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 372.55487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.7543029785156
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 371.9460754394531
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 363.3277893066406
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 4 19.0 1216.66724247 (15.829956988360925, 11)
loss 374.51361083984375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 367.1751403808594
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 372.5538635253906
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 367.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.53741455078125
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 372.8504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 367.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 370.41943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 369.7049255371094
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 367.9115905761719
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 367.8009338378906
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 370.8027648925781
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 372.0057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 369.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.9684143066406
############ Running episode number: 129  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.2417297363281
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 370.509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 371.4241027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 370.8153381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 371.17303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 369.2488098144531
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 373.12689208984375
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 367.0640563964844
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 369.0047912597656
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 367.8737487792969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 370.729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 370.578125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 368.0039367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.6190185546875
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 368.888916015625
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 3 20.0 977.313511661 (10.552868829802469, 10)
loss 368.7044677734375
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 368.37738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.81866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 373.22283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 370.85186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 369.5603332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 373.16876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.73187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 370.58306884765625
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 371.92822265625
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 372.92889404296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 365.166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 367.286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 372.1110534667969
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 366.37237548828125
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 369.32647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 370.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 369.9282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 368.2864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 370.8118591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 372.99676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.4192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 370.84075927734375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 375.2548522949219
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 0 22.0 928.797305964 (10.425974763084863, 8)
loss 370.098388671875
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 3 21.0 980.387437704 (10.546025383098053, 9)
loss 372.78680419921875
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 367.90826416015625
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 1 21.0 942.865015335 (10.624473674922116, 9)
loss 374.61114501953125
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 368.1474304199219
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 4 19.0 949.018321829 (10.924797168745895, 11)
loss 375.02435302734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 371.6041564941406
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 370.3346252441406
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 371.1779479980469
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 372.2620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 372.06219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.7664489746094
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 372.1019287109375
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 374.67498779296875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 1 21.0 1101.69413046 (14.283719188889453, 9)
loss 375.2469177246094
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 364.94427490234375
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 367.21844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.8067932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 371.6114196777344
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 372.2317199707031
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 363.56768798828125
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 370.0430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.97900390625
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 364.8271789550781
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 371.5947570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 369.574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 369.2795104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 371.4700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 368.5006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 368.58416748046875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 370.7300109863281
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 371.1565246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.4121398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 370.87188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 371.4117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 366.8490295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.1046447753906
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 366.476318359375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 372.1797790527344
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 374.54351806640625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 2 21.0 1258.0576862 (15.828704162850809, 9)
loss 367.4247131347656
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 0 23.0 1251.67208827 (15.550833128512703, 7)
loss 366.89764404296875
Current State,action,reward,Response time,Next State:  (7, 15.550833128512703) 1 24.0 1344.9062349 (15.446694946204717, 6)
loss 372.2154846191406
Current State,action,reward,Response time,Next State:  (6, 15.446694946204717) 1 25.0 1419.0294644 (15.750501603468638, 5)
loss 367.98126220703125
Current State,action,reward,Response time,Next State:  (5, 15.750501603468638) 0 -74.1203413193 1704.75586919 (15.817158911312735, 3)
loss 366.21826171875
Current State,action,reward,Response time,Next State:  (3, 15.817158911312735) 4 25.0 3011.20341319 (15.829956988360925, 5)
loss 367.5533447265625
Current State,action,reward,Response time,Next State:  (5, 15.829956988360925) 1 26.0 1712.05640643 (15.892373986997768, 4)
loss 371.6924743652344
Current State,action,reward,Response time,Next State:  (4, 15.892373986997768) 1 -76.827224819 1990.51332244 (15.954793861767499, 3)
loss 371.2023010253906
Current State,action,reward,Response time,Next State:  (3, 15.954793861767499) 4 25.0 3038.27224819 (16.004586266677634, 5)
loss 370.23431396484375
Current State,action,reward,Response time,Next State:  (5, 16.004586266677634) 3 24.0 1728.10173237 (16.017694914042416, 6)
loss 423.6172180175781
Current State,action,reward,Response time,Next State:  (6, 16.017694914042416) 3 23.0 1456.7948918 (15.947547279389703, 7)
loss 426.13238525390625
Current State,action,reward,Response time,Next State:  (7, 15.947547279389703) 3 22.0 1369.62816392 (16.11465619633363, 8)
loss 423.1506652832031
Current State,action,reward,Response time,Next State:  (8, 16.11465619633363) 3 21.0 1312.86125789 (16.147078378791146, 9)
loss 370.3408203125
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 367.58270263671875
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 373.1107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 370.96173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 367.2017517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 376.67974853515625
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 367.6469421386719
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 424.18231201171875
############ Running episode number: 130  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 370.5433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 434.2183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 375.7679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 371.4619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 374.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 374.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 424.4577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.81390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 369.94085693359375
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 370.7573547363281
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 0 22.0 951.434021987 (10.819208572963639, 8)
loss 364.8005065917969
Current State,action,reward,Response time,Next State:  (8, 10.819208572963639) 0 24.0 1003.36990711 (10.768325938188134, 6)
loss 422.52911376953125
Current State,action,reward,Response time,Next State:  (6, 10.768325938188134) 3 23.0 1109.60632067 (10.772009508959538, 7)
loss 423.18994140625
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 3 22.0 1047.1055689 (10.644925616761762, 8)
loss 427.4529113769531
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 3 21.0 993.183975462 (10.58735855349979, 9)
loss 373.62445068359375
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 3 20.0 977.313511661 (10.552868829802469, 10)
loss 370.9734191894531
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 369.58929443359375
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 374.69500732421875
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 0 23.0 972.171495057 (10.448897752470936, 7)
loss 365.39471435546875
Current State,action,reward,Response time,Next State:  (7, 10.448897752470936) 3 22.0 1026.97030049 (10.433149880183072, 8)
loss 373.91607666015625
Current State,action,reward,Response time,Next State:  (8, 10.433149880183072) 3 21.0 980.806785952 (10.44185150623065, 9)
loss 374.39910888671875
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 4 19.0 969.696935814 (10.370942817486826, 11)
loss 366.6924133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 371.12127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 379.9462585449219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 427.8932800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 378.041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 368.4996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.8104553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 370.4720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 371.1517028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 420.3594055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 428.6177673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 377.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 375.60205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 374.1649475097656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 371.5813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 428.6021423339844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 4 19.0 913.272125304 (10.333617326102203, 11)
loss 369.6826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.0626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 373.5361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 371.4039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 373.5805969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 375.52239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 430.2513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 371.824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 376.0509338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 369.6558532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 368.7893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 373.7318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 426.4878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 369.3294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 370.12738037109375
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 375.1319580078125
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 378.0469970703125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 371.64166259765625
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 369.6463928222656
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 2 20.0 1192.09754638 (15.836943704090487, 10)
loss 373.5335388183594
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 4 19.0 1217.71670884 (16.466876895473597, 11)
loss 373.1192321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 429.1952819824219
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 427.1211853027344
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 371.6418151855469
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 373.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 375.15087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.2091369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 375.1357727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 380.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 369.958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 428.76226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 376.8847351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 378.42279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 378.6915283203125
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 1 20.0 1383.93777195 (19.25591252280865, 10)
loss 376.7031555175781
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 424.54461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 424.0113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 424.0184631347656
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 370.4840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 380.17108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 373.4447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 370.9843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 374.5932922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 371.2997131347656
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 431.7988586425781
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 431.67327880859375
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 1 22.0 1231.67579099 (15.750501603468638, 8)
loss 373.7962646484375
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 3 21.0 1291.57831736 (15.817158911312735, 9)
loss 378.1956481933594
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 372.12335205078125
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 376.1881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 370.66143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 373.1873474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 371.6715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 374.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 379.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 426.1705017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 373.5760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.5111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 376.4572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 430.0837097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 425.1705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 430.30572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 377.6050109863281
############ Running episode number: 131  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 375.3154602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 370.4788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 427.2108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 370.76092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 382.1500549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 371.0771179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 379.4213562011719
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 371.4279479980469
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 371.96636962890625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 369.7829284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 370.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 370.9577941894531
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 376.9167175292969
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 382.9443664550781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 369.56561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 369.2062683105469
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 376.6667785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 369.48468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 372.49957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 434.2468566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 381.37286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 369.5949401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 366.622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 375.1678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 371.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 370.0519104003906
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 425.4682922363281
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 0 23.0 961.129617982 (10.268274366284802, 7)
loss 373.7445373535156
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 3 22.0 1015.71444152 (10.335411397720526, 8)
loss 373.8222961425781
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 0 24.0 975.09448038 (10.305649118067803, 6)
loss 377.50872802734375
Current State,action,reward,Response time,Next State:  (6, 10.305649118067803) 0 26.0 1079.00528942 (10.24826025489064, 4)
loss 372.0107727050781
Current State,action,reward,Response time,Next State:  (4, 10.24826025489064) 0 -57.6997044233 1349.31722482 (10.276491935146446, 2)
loss 373.1571350097656
Current State,action,reward,Response time,Next State:  (2, 10.276491935146446) 3 27.0 2856.99704423 (10.236991269871366, 3)
loss 457.7830810546875
Current State,action,reward,Response time,Next State:  (3, 10.236991269871366) 2 27.0 1913.74497954 (10.236272697871373, 3)
loss 371.37664794921875
Current State,action,reward,Response time,Next State:  (3, 10.236272697871373) 3 26.0 1913.60365711 (10.369891240151098, 4)
loss 382.4875793457031
Current State,action,reward,Response time,Next State:  (4, 10.369891240151098) 3 25.0 1363.1350387 (10.316955310454549, 5)
loss 423.2239685058594
Current State,action,reward,Response time,Next State:  (5, 10.316955310454549) 3 24.0 1205.50956788 (10.333617326102203, 6)
loss 381.6222839355469
Current State,action,reward,Response time,Next State:  (6, 10.333617326102203) 3 23.0 1080.85508169 (10.390165524255663, 7)
loss 381.76287841796875
Current State,action,reward,Response time,Next State:  (7, 10.390165524255663) 3 22.0 1023.31030002 (10.425974763084863, 8)
loss 407.2366027832031
Current State,action,reward,Response time,Next State:  (8, 10.425974763084863) 3 21.0 980.387437704 (10.546025383098053, 9)
loss 368.3106689453125
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 429.3772277832031
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 382.72247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 402.5102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 378.2975769042969
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 426.67010498046875
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 373.03717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 439.7118225097656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 368.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 382.59173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 379.2870788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 375.6505432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.69537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 375.7032165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 372.5263671875
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 421.7972412109375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 4 19.0 1192.09754638 (15.836943704090487, 11)
loss 436.37225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 374.6665344238281
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 429.5835266113281
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 0 22.0 1272.5994393 (17.534967586021782, 8)
loss 374.4761657714844
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 3 21.0 1395.8710659 (17.669285735563751, 9)
loss 409.4880065917969
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 378.1576232910156
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 0 22.0 1329.50910109 (18.385807405229915, 8)
loss 375.4105224609375
Current State,action,reward,Response time,Next State:  (8, 18.385807405229915) 3 21.0 1445.59822471 (18.671267839956315, 9)
loss 368.1873474121094
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 0 23.0 1400.46626871 (19.02839494033929, 7)
loss 375.0975341796875
Current State,action,reward,Response time,Next State:  (7, 19.02839494033929) 3 22.0 1561.61651886 (19.286321916040979, 8)
loss 380.27203369140625
Current State,action,reward,Response time,Next State:  (8, 19.286321916040979) 3 21.0 1498.22861069 (19.340464848017284, 9)
loss 407.430908203125
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 4 19.0 1435.4954296 (19.213467265587269, 11)
loss 427.8915710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 426.1997985839844
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 376.8933410644531
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 488.28411865234375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 428.23687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 375.2940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 439.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 428.2676086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 375.9049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 433.4944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 426.7907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 375.7436828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 377.88201904296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 382.3509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 371.6592102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 407.41357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 384.3389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 374.2833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 369.8237609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 379.98828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 376.4535217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 378.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 377.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 379.61004638671875
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 383.551025390625
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 375.7342529296875
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 374.310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 376.96307373046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 430.4978942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 426.3304138183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 426.8084411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 383.1800231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 372.3862609863281
############ Running episode number: 132  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 374.30035400390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 433.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 430.4329528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 372.2709045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 376.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 372.4736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 402.8422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 378.5006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 372.02825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 373.25152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 432.740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 373.587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 433.0744934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 372.0168151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 368.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 376.49444580078125
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 377.6198425292969
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 428.4978332519531
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 376.8988342285156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 383.1959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.7437438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 372.7004089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 383.60003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 438.1676330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 378.62579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 425.1098937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 375.676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 390.1508483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 383.3376159667969
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 377.6745300292969
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 373.3822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 380.21246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 373.5108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 373.5925598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 373.9399108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 375.9840087890625
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 376.4181823730469
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 384.585205078125
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 0 22.0 925.797758139 (10.390165524255663, 8)
loss 426.93310546875
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
loss 420.5918884277344
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 369.4984130859375
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 379.4955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 372.1723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 430.51043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 409.7946472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 373.3323059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 424.8201599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 378.99017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 430.2373962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 375.2339172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 384.0118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 379.73162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 404.0622863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 372.5615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 376.9065246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.07861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.6834716796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 429.2319030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 430.779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 373.236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 377.53497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 455.79986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 383.8035583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 381.29754638671875
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 373.4954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 372.14501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 376.499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 379.9660949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 434.12548828125
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 438.37921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 460.43426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.1800231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 382.2366638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 380.44921875
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 378.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 372.65045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 381.2328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 383.7795715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 382.51580810546875
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 375.8019714355469
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 1 22.0 1258.0576862 (15.828704162850809, 8)
loss 378.42047119140625
Current State,action,reward,Response time,Next State:  (8, 15.828704162850809) 3 21.0 1296.14884991 (15.550833128512703, 9)
loss 379.5353088378906
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 2 21.0 1237.12691092 (15.446694946204717, 9)
loss 375.6322326660156
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 373.99700927734375
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 372.5617980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 382.8100280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 379.1526794433594
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 381.1603088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 379.7689514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 379.5020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 409.0799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 376.2452087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 442.3048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 373.6343688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 380.6914978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 376.68402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 380.5581970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 377.7366638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 375.6868591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 377.9068603515625
############ Running episode number: 133  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.61395263671875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 379.7813415527344
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 370.772216796875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 421.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 384.48773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 378.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 389.0787048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 372.26300048828125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 380.052490234375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 432.1337585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 382.72222900390625
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 383.5198669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 438.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 371.29547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 380.3883056640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 426.5878601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 424.3670959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 381.4788513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 375.0291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 374.9078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 381.36407470703125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 380.3394470214844
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 376.9498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 427.3528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 430.4544372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 434.67889404296875
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 378.50689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 380.793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 415.58685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 371.4281921386719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 381.3927001953125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 379.7335510253906
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 377.47576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 422.6040954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 406.4169006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 373.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 371.9495544433594
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 426.00238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 377.53125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 406.8417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 377.3979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 376.959228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 376.80450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 377.35198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 378.82366943359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 436.6117248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 388.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 379.49530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 370.41070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 373.1455078125
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 430.4288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 377.5943908691406
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 381.89862060546875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 406.40875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 380.92724609375
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 372.0484313964844
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 372.0345153808594
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 375.7866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 377.39190673828125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 379.7510986328125
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 381.3310852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 381.2767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 376.4469909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 379.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 373.023681640625
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 382.2121276855469
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 436.9955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 376.6408386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 456.3468933105469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 371.0760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 377.1077880859375
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 0 21.0 1383.93777195 (19.25591252280865, 9)
loss 432.9115905761719
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 379.4901428222656
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 377.8805847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 376.9689025878906
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 376.4847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 379.73248291015625
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 374.8896789550781
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 368.56610107421875
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 0 22.0 1238.94234737 (15.950694610794756, 8)
loss 371.1498107910156
Current State,action,reward,Response time,Next State:  (8, 15.950694610794756) 3 21.0 1303.27855664 (15.828704162850809, 9)
loss 401.6357727050781
Current State,action,reward,Response time,Next State:  (9, 15.828704162850809) 3 20.0 1251.67208827 (15.550833128512703, 10)
loss 404.92413330078125
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 485.8469543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 484.78033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 456.6954040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 378.72064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 373.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 377.72540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 428.1242370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 370.45135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 373.0460510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 379.9550476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 374.77398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 385.16156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 401.4201354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 382.6844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 367.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 474.30462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 377.1163330078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.6379089355469
############ Running episode number: 134  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 430.5471496582031
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 431.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 378.7006530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 462.7955017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 380.750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 430.5886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 373.43450927734375
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 429.90130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 376.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 434.64453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 436.0893249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 380.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 368.7581481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 371.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 372.8521423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 377.30816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 378.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 378.20050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 376.3329162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 379.2348327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 377.1609802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 376.976806640625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 379.0259704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 432.677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 435.6424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 368.51422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 388.2352600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 371.84686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 378.2908935546875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 383.44287109375
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 378.36395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 375.44854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 434.853271484375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 438.7665100097656
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 427.6571350097656
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 376.79278564453125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 382.36590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 430.4373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 382.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 386.59869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 369.56317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 381.5220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 431.8979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 461.7867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 373.5257263183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 378.4170837402344
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 428.3008728027344
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 435.1881408691406
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 375.8871154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 381.36279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 426.67852783203125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 374.53961181640625
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 423.35552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 378.0232849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 408.33673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 431.34222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 376.37139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 380.900634765625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 371.268310546875
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 379.7297668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 374.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 429.9623718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 375.29205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 510.5844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 374.0352478027344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 381.4947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 374.7774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 374.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 376.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 384.54248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 372.0080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 427.1280822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.4169006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 409.6087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.1333923339844
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 436.1480407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 380.4561462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 385.3328552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 430.9738464355469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 372.547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 372.50482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 421.7569885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 368.7159118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 436.7794494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 370.6939392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.5046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 371.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 375.6108093261719
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 383.4368591308594
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 367.40936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 371.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 378.1177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 379.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 372.60626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 376.1214904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 372.1400451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 386.3599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 379.81781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 430.9407653808594
############ Running episode number: 135  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.8369445800781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 389.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.51739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 402.88262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 382.44366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 371.9382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 377.83367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 425.4155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 377.7887268066406
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 460.8652038574219
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 4 19.0 951.434021987 (10.819208572963639, 11)
loss 374.5487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 387.8787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 381.28900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 405.1460266113281
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 404.5738830566406
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 370.3692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 459.11981201171875
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 379.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 371.7121276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 402.5863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 434.0834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 379.1437683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 372.4607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 429.81085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.9498291015625
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 379.977783203125
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 435.349365234375
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 372.73040771484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 458.2490539550781
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 380.85980224609375
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 2 20.0 925.892923039 (10.305649118067803, 10)
loss 457.4536437988281
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 4 19.0 924.314209939 (10.24826025489064, 11)
loss 373.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 376.66558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 375.5065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 381.8993225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 425.8584899902344
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 371.2940979003906
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 375.8801574707031
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 1 21.0 925.797758139 (10.390165524255663, 9)
loss 374.8747253417969
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 431.1628112792969
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 380.6310729980469
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 379.3575134277344
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 382.58575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 430.08355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 380.17242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 377.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 452.67437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 375.84588623046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 432.02825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 423.7247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 434.44403076171875
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 374.01190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 427.3347473144531
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 462.1495666503906
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 370.82208251953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 379.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 377.6680603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 378.2489929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 427.12689208984375
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 378.493408203125
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 368.6268005371094
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 0 22.0 1314.91162813 (17.944480812078613, 8)
loss 375.93408203125
Current State,action,reward,Response time,Next State:  (8, 17.944480812078613) 3 21.0 1419.80498244 (18.385807405229915, 9)
loss 392.7796325683594
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 2 21.0 1385.5238237 (18.671267839956315, 9)
loss 394.157958984375
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 373.04388427734375
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 374.87548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 371.6128845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 372.8893127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 380.2723693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 380.0128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 371.1883239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 370.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 374.1105041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 477.284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 371.3255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 383.0891418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 389.3543395996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 375.9092712402344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 396.551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 383.4336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 433.66583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 370.37457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.6913146972656
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 461.67376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 371.6075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 374.7099304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 454.1273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 455.50311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 381.7269287109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 376.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 376.3712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 373.78228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 377.3043212890625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 376.3549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 371.1224670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 377.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 433.61224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 373.2236022949219
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 433.77667236328125
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 373.5740966796875
############ Running episode number: 136  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 373.42889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 374.71795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 375.4926452636719
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 436.01434326171875
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 406.0755615234375
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 487.6521301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 368.4945373535156
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 377.30908203125
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 1 21.0 960.915933313 (10.931193889570471, 9)
loss 371.7867126464844
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 4 19.0 995.311594677 (10.816918347608043, 11)
loss 378.0660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 380.14697265625
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 372.068359375
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 424.72076416015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 377.7117614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 399.30023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 379.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 378.3134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 373.6396789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 375.6361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 420.24871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 368.2682189941406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 423.5030517578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 381.5481262207031
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 371.139404296875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 368.59454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 436.53179931640625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 380.82958984375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 426.38671875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 370.6994934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 371.6181945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 397.7580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 375.9373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 409.4094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 374.5650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 432.74237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 423.6063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 373.0513610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 377.6153869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 368.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 370.0434265136719
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 378.48077392578125
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 371.29815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 425.57928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 384.69549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 373.43060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 376.13592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 385.2430114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 409.12811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 372.9242248535156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 433.7469177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 375.316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 378.27716064453125
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 371.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 370.3157653808594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 378.41192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 423.7333679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 370.4887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 377.30633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.3655090332031
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 373.6124572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 429.457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 373.5836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 379.7108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 367.7913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 377.3232116699219
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 401.9183654785156
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 4 19.0 1400.68584406 (19.340464848017284, 11)
loss 377.7264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 372.9145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 388.3221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 375.8563232421875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 408.42474365234375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 0 22.0 1397.37841716 (19.25591252280865, 8)
loss 433.89117431640625
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 3 21.0 1496.45133993 (19.08360399753829, 9)
loss 377.25555419921875
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 1 22.0 1422.05003169 (18.668181536495972, 8)
loss 378.67822265625
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
loss 369.1622619628906
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 372.4027099609375
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 380.8283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 372.7166442871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 427.7679138183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 377.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 373.48175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 372.3578796386719
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 373.1837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 378.2042236328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 429.2694396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 428.3342590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 371.67034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 473.0870056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 376.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 421.010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 369.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 425.2016296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 371.1070556640625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 452.6776428222656
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 399.353515625
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 378.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 400.10260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 431.4649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 375.54034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 374.8782043457031
############ Running episode number: 137  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 380.0171203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 378.4652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 399.2762145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 378.1923828125
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 405.9580383300781
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 383.6153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 371.6929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 430.4519348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 404.62115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 378.82464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 380.8692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 427.8620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 379.8240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 375.21881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 423.2826232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 460.684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 374.7313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 409.11370849609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 402.4950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 373.3531188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.1129455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 370.39105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 375.5319519042969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 374.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 422.6911315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 371.2234802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 374.57220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 427.5643615722656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 370.3622131347656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 379.9898986816406
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 373.2551574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 432.3289489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 369.1999816894531
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 366.4048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 377.0335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 372.8621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 379.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 463.657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 434.2362365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 372.52423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 377.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 402.4029235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 422.0330505371094
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 374.4776916503906
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 375.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.9800720214844
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 372.2533264160156
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 378.6053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 369.576416015625
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 379.59466552734375
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 372.5728454589844
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 452.321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 427.38409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 373.9717102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 374.3106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 428.5253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 379.89593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 379.84124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 425.1795349121094
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 373.7178649902344
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 429.3433837890625
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 4 19.0 1348.01745033 (17.944480812078613, 11)
loss 373.6246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 373.1152038574219
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 438.8815612792969
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 377.32659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 429.9477233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 400.478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 425.21044921875
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 405.5182800292969
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 2 21.0 1425.04216908 (19.385636054792762, 9)
loss 367.8531799316406
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 375.75958251953125
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 386.0137634277344
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 370.0734558105469
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 367.0069274902344
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 372.3516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 372.45477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 378.1460266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 428.1179504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 401.71209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 370.4523620605469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 466.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 375.0937805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 370.1019592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 373.7716064453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 426.8009033203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 370.9541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 374.2220153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 378.3446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 453.7115173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 433.09417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 376.8194885253906
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 374.4637451171875
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 373.8629455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 425.1293640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 374.05487060546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 378.3219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 383.8305969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 370.43865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 376.22967529296875
############ Running episode number: 138  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 374.2169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 371.41326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 428.25640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 424.4563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 401.2237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 402.5077209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 370.82684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 424.4570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 378.0183410644531
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 375.8354187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 378.0664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 375.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 377.7604064941406
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 370.4584655761719
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 372.12164306640625
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 419.3609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 376.27685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 400.97052001953125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 382.494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 381.22662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 372.6705627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 418.82720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.22442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.3341369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.4687194824219
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 371.6361389160156
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 424.9767150878906
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 369.546142578125
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 1 21.0 922.857214352 (10.268274366284802, 9)
loss 373.4720153808594
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 1 22.0 960.611029141 (10.335411397720526, 8)
loss 384.7225341796875
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 3 21.0 975.09448038 (10.305649118067803, 9)
loss 378.0028076171875
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 405.4197692871094
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 373.8004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 481.392578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 382.3392639160156
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 376.59954833984375
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 373.2006530761719
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 4 19.0 924.913936648 (10.333617326102203, 11)
loss 377.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 407.40911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 375.02825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 401.39678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 373.3633117675781
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 373.32684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 369.6573791503906
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 374.2533874511719
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 380.140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 373.70489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 375.44329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 371.72528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 371.0283203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 370.5433044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 382.2032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 373.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 369.2543640136719
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 376.10980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 424.2346496582031
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 374.3745422363281
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 385.19842529296875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 0 22.0 1251.130943 (16.871606159345866, 8)
loss 381.4532165527344
Current State,action,reward,Response time,Next State:  (8, 16.871606159345866) 3 21.0 1357.1010433 (17.534967586021782, 9)
loss 436.5854187011719
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 375.3009033203125
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 374.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 377.2578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 372.3726501464844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 426.3899841308594
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 377.75250244140625
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 376.1986083984375
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 363.47430419921875
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 370.7662048339844
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 372.88323974609375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 4 19.0 1397.37841716 (19.25591252280865, 11)
loss 375.97857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 379.145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 429.0044250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 377.17138671875
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 370.9910888671875
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 373.5189208984375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 380.3367004394531
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 403.7127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 434.3241882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 376.89385986328125
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 370.0737609863281
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 399.21173095703125
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 0 22.0 1197.01631782 (15.750501603468638, 8)
loss 373.80010986328125
Current State,action,reward,Response time,Next State:  (8, 15.750501603468638) 3 21.0 1291.57831736 (15.817158911312735, 9)
loss 369.4660949707031
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 0 23.0 1251.06775133 (15.829956988360925, 7)
loss 376.1514587402344
Current State,action,reward,Response time,Next State:  (7, 15.829956988360925) 4 21.0 1362.30032139 (15.892373986997768, 9)
loss 378.1913146972656
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 430.62237548828125
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 368.0098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 378.1618957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 376.0171813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 368.1461181640625
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 407.082275390625
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 373.2816467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 372.8565979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 370.6906433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 368.5123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 377.49029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 371.8543395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 425.4522399902344
############ Running episode number: 139  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 379.94659423828125
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 427.11492919921875
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 380.7839050292969
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 373.0721435546875
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 381.3072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 372.2503356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 378.7330017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 372.49395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 423.3719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 425.7080993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 379.6529235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 372.4958190917969
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 378.1310119628906
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 372.799072265625
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 366.8551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 373.5845031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 375.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 374.2215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 373.43341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 379.7021179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 412.8650817871094
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 425.9827575683594
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 424.5437927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 433.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 372.5087585449219
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 427.6015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 369.78228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 407.842529296875
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 373.3678283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 412.103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 370.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 367.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 428.2389221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 482.2127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 374.7450256347656
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 371.4855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 460.8147277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 376.8753662109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 375.1993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 430.39404296875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 378.3883972167969
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 365.5931701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 374.05023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 372.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 374.9799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 377.1402282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 371.4718322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 375.6858215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 406.56396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 404.0892028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 423.5203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 406.4603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 366.617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 406.1990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 371.33013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.4375
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 372.21881103515625
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 0 23.0 1285.07728144 (16.871606159345866, 7)
loss 371.58740234375
Current State,action,reward,Response time,Next State:  (7, 16.871606159345866) 3 22.0 1427.21249257 (17.534967586021782, 8)
loss 424.9391784667969
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 3 21.0 1395.8710659 (17.669285735563751, 9)
loss 370.499267578125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 379.62457275390625
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 2 20.0 1329.50910109 (18.385807405229915, 10)
loss 378.434326171875
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 431.7906799316406
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 376.7776794433594
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 380.2562255859375
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 367.3998107910156
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 382.13909912109375
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 0 22.0 1396.82133527 (19.140765783401285, 8)
loss 379.8963317871094
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 378.746826171875
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 376.1768493652344
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 430.6651306152344
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 435.0401611328125
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 4 19.0 1422.05003169 (18.668181536495972, 11)
loss 377.4068298339844
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 458.2415466308594
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 374.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 376.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 368.32366943359375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 371.6911926269531
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 385.45556640625
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 406.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 380.4676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 432.6586608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 373.1003112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 407.0966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 370.5982360839844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 375.5048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 378.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 373.6351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 371.1553039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 372.73968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 367.7900695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 371.6300354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.63592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 377.74560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 376.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 368.539794921875
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 367.6128845214844
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 4 19.0 1304.91298164 (17.052961248403161, 11)
loss 372.509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 370.5215759277344
############ Running episode number: 140  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 372.4584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.0229187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 375.1212463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 371.6348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 373.1298828125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 372.94732666015625
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 379.2607727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 371.0491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 370.10577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 374.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 386.5400695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 376.4392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 372.5900573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 370.31488037109375
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 368.4808349609375
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 366.87762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 371.9632263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 400.25384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 368.7188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.52337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 372.4154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 377.2649841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.3751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 374.0130920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 374.43359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.22174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 377.1310729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 370.1153869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 374.67138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 373.10711669921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 373.8595886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 370.3775329589844
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 369.2114562988281
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 380.8470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 374.2701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 404.4734191894531
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 375.2576904296875
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 365.4337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 371.976806640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 372.5085144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 373.1455383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 376.45709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 370.6787414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 373.1595764160156
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 364.6675720214844
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 370.17498779296875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 4 19.0 963.253801267 (11.271571944085663, 11)
loss 370.46923828125
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 401.4917907714844
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 368.65460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 370.8020935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.6444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 376.25140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 369.336669921875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 375.64324951171875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 367.4635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 375.2767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 370.76763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 372.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 376.5813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.6934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 370.8422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 370.4316101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 398.1859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 373.3245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.6940612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 397.6847229003906
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 371.9942626953125
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 374.67822265625
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 0 22.0 1396.82133527 (19.140765783401285, 8)
loss 377.88311767578125
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 371.6833190917969
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 373.5743103027344
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 0 22.0 1397.37841716 (19.25591252280865, 8)
loss 370.30535888671875
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 3 21.0 1496.45133993 (19.08360399753829, 9)
loss 371.47760009765625
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 370.5185546875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 372.8997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.25201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 372.5267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 372.7193298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 376.97796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 377.84381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 401.2345886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 406.6883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 373.8633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 372.7804870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 374.5986633300781
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 368.4082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 370.6375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 368.2674865722656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 368.8075256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 395.733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 369.0353698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.9264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 374.751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 376.4931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 371.7922668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 372.3590393066406
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 369.89093017578125
############ Running episode number: 141  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.9500427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 367.2899475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 371.5742492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 366.83392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 406.5855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 400.130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 367.35797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 370.2289123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.9862365722656
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 377.28704833984375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 369.7572937011719
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 368.1404724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 370.0679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 369.8399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 370.4964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 405.1309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 401.25262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 377.2933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 369.6064147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 369.7594299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 366.6661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.51470947265625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 374.1788635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 402.0421142578125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 362.1315612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 369.1329650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 368.4090881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 374.047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 375.1148986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.70013427734375
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 376.58233642578125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 376.0615539550781
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 2 20.0 921.2700698 (10.276491935146446, 10)
loss 367.89031982421875
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 374.62725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 368.3519287109375
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 373.4944152832031
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 368.75653076171875
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 365.32879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 368.43133544921875
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 363.5011291503906
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 368.2740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 373.0613098144531
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 371.5988464355469
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 372.1943664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.9850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 369.3611145019531
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 372.562255859375
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 372.72259521484375
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 370.2341613769531
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 4 19.0 1004.62682792 (12.19918626616789, 11)
loss 365.10369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.2095947265625
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 373.10357666015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.4363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 368.1462707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 370.8880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.10186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.31060791015625
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 370.3648376464844
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 369.1216735839844
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 369.946533203125
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.11297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 369.3558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.47369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.19964599609375
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 369.59869384765625
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 372.1565856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.1387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.86053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.0990905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.43670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 366.33056640625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 369.65533447265625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 4 19.0 1367.89714889 (18.375894992990247, 11)
loss 367.72406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 370.5675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 369.3291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.8382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 366.4765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 367.1571960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 368.0516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 370.5560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 370.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 368.7919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 370.107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 372.8880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.8115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 372.1675109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 366.7698974609375
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 369.6893310546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 374.47564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.2125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.1866149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 372.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 368.5823669433594
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 369.4900207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.7132873535156
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 371.37811279296875
############ Running episode number: 142  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.7184143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.388427734375
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 367.5186462402344
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 367.4739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 370.4365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 370.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 369.77001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.5621032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 369.553955078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 367.2083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 366.4150390625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 365.3906555175781
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 4 19.0 949.051873418 (10.644925616761762, 11)
loss 363.6990661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 372.3189392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 368.085693359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 366.71044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 369.5390319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.5900573730469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 366.56304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.48516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 364.44744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 369.1639709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.1304626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 366.8136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.7033386230469
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 366.29376220703125
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 1 22.0 962.389338906 (10.278181486298042, 8)
loss 366.53594970703125
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 365.9775085449219
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 365.5279846191406
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 366.7088623046875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 366.93145751953125
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 2 20.0 921.2700698 (10.276491935146446, 10)
loss 368.40167236328125
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 369.6589660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 375.006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 367.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.37567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 369.1587829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.1177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 376.07989501953125
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 1 20.0 925.376677007 (10.655373370049301, 10)
loss 369.23583984375
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 0 22.0 942.865015335 (10.624473674922116, 8)
loss 368.9138488769531
Current State,action,reward,Response time,Next State:  (8, 10.624473674922116) 3 21.0 991.988665914 (10.771376986314287, 9)
loss 367.62359619140625
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 1 22.0 986.945968488 (10.924797168745895, 8)
loss 368.5838928222656
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 3 21.0 1009.54101094 (11.039747673816453, 9)
loss 367.96478271484375
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 366.9669189453125
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 1 21.0 975.550709187 (11.670334358779868, 9)
loss 369.5286865234375
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 369.229248046875
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 372.4996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 369.2042541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 370.94696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 368.35113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.7597961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.8859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.8791809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.34100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 368.7791442871094
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 368.1788024902344
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 367.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 368.3470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 372.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 367.4963684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.4827575683594
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 369.1086730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.594482421875
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 367.9547424316406
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 370.5741271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.77484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 371.99285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 372.7311706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 371.3731384277344
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 369.7497253417969
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 368.1777648925781
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 366.5693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 369.2628479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.61651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 368.22808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.1041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.57904052734375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 365.1620178222656
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 372.2873840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 364.7428894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 368.2224426269531
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 370.6184387207031
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 369.6593933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.13238525390625
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 370.8475036621094
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 366.7720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 372.822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.22509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 372.173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 368.37249755859375
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 368.3014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.8028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 373.5622863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 372.0950012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.1120300292969
############ Running episode number: 143  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 370.1622009277344
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 372.4375
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 367.3926696777344
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 365.187744140625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 368.76690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 367.4307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 367.7420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.6728820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.3222351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 370.6923522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.66571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 367.91943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 369.4089660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.7765808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 368.6351623535156
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 368.16644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 371.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 371.435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 372.50262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 368.00244140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 371.10333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 367.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.5775146484375
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 370.6401672363281
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 2 21.0 966.902645924 (10.344006106602812, 9)
loss 367.423583984375
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 375.86676025390625
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 369.2880554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 377.9042663574219
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 369.1748962402344
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 373.2539367675781
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 373.4798278808594
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 367.67401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.0241394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 371.7496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.4349060058594
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 368.77728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 374.94580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 368.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 374.5343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 369.39251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 372.527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 368.2799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 372.6897888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.7098693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.0072326660156
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 369.8040771484375
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 368.5464782714844
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 370.28167724609375
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 369.95977783203125
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 370.0703430175781
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 365.02008056640625
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 367.5628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.5125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 371.7569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.7926940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.43603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.08319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.0130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 369.4068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 373.1932067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.5458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.309814453125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 365.18511962890625
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 368.7555236816406
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 369.0243835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 368.8013610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 368.1842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 373.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 369.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 372.1224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 370.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.36114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 367.9021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 369.8705139160156
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.5539245605469
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 370.3041076660156
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 368.1199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.5793151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.7224426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 371.2425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 372.2667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.59295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.69464111328125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 370.2594909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.2088928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 369.7451477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 368.9378967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.64910888671875
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 368.0229187011719
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 369.1028137207031
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 369.8993225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.7284851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 373.1194152832031
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 372.2767333984375
############ Running episode number: 144  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.31396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.37591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.2024841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 371.3750915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.76507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.1260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 372.41473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 367.8365173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.7037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 366.28704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.9901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 368.5513610839844
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 365.85748291015625
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 370.0220031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 370.635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.1622009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 369.3986511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 372.69366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 369.2417297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 368.24029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.7489929199219
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 368.0616455078125
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 368.04498291015625
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 371.7434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.9767761230469
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 368.90496826171875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 369.2681579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.8113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 367.3676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 369.8554992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 374.1776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 368.0131530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 370.5401916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 370.653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 370.2577819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.3223571777344
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 368.7248229980469
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 366.5982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 367.8935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 368.72540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.3431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 368.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 369.5096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 367.32879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 373.3666687011719
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 365.7834777832031
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 372.15936279296875
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 374.68701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.2047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.8202209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 369.1019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.1873474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.4591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.73944091796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.7467956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 370.61627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.0382995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 366.1673889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 368.8006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 366.24591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.49542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.7406005859375
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 368.8416442871094
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 367.8062438964844
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 0 22.0 1396.82133527 (19.140765783401285, 8)
loss 370.8301696777344
Current State,action,reward,Response time,Next State:  (8, 19.140765783401285) 3 21.0 1489.72161235 (19.385636054792762, 9)
loss 366.5269775390625
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 363.4156188964844
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 369.84375
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 372.7784118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 374.17108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.1827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 369.6971740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.5220642089844
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 368.9938659667969
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 375.37109375
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 378.7830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 368.9677429199219
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.2414855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 371.3870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 369.8955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 369.1079406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 373.01983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 371.64263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.809326171875
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 365.6985168457031
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 368.27960205078125
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 0 22.0 1234.16752106 (16.229253414601111, 8)
loss 366.6443786621094
Current State,action,reward,Response time,Next State:  (8, 16.229253414601111) 3 21.0 1319.55886882 (16.295120821876548, 9)
loss 369.8543395996094
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 370.91119384765625
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 375.4771423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 373.2049255371094
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 366.722412109375
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 368.9424743652344
############ Running episode number: 145  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.8924255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 376.5855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.0573425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 369.91400146484375
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 371.6624450683594
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 367.0169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 369.3785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.16973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.0927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 367.1763000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.4132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 368.62939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 376.32000732421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 372.7069396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 370.1288757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 373.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 370.1752014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 370.7325134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.3575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 372.36712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 372.5511779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 369.8187561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 372.393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.3504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 368.6088562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 367.6168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 369.258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 372.5195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.43878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 376.5648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 369.08447265625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 368.0332336425781
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 366.88909912109375
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 1 21.0 920.672316722 (10.236272697871373, 9)
loss 373.848876953125
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 369.0818176269531
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 365.5479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 369.8773498535156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 367.8583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 369.9814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 368.1712951660156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 371.8152160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.9923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.56768798828125
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 371.3750305175781
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 368.4384765625
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 366.37689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 367.4981689453125
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 367.44830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.79327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 370.9031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 369.1475524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.6617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 370.82818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.76336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 369.6364440917969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 371.57373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.7240905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 368.49908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 369.119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.6144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.00408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 366.66668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 371.57611083984375
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 368.62200927734375
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 371.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 371.1483154296875
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 1 20.0 1383.38281107 (19.140765783401285, 10)
loss 373.5060119628906
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 367.83642578125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 370.7333984375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 369.72198486328125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.8970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 370.0906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 367.51348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 371.465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 370.2982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 368.63818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.5425109863281
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 372.0101013183594
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 370.53564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.5945129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 372.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 375.2264709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 369.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.52154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.1209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 368.6597595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 368.5430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.59014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 369.3205261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.9657287597656
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.61724853515625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 368.827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 371.2958068847656
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 365.160400390625
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 365.7542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 370.8700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 369.29718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 368.9986877441406
############ Running episode number: 146  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.85272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 369.7396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 371.41668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.90185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 371.7485656738281
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 369.66925048828125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 373.10247802734375
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 371.66534423828125
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 370.9998474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 367.17010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 369.1205749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.8475646972656
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 374.5740661621094
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 365.8429870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 367.1946105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 370.2917785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 372.341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.7707824707031
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 372.7005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 370.0445251464844
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 367.82501220703125
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 375.03570556640625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 366.3638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.4367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 368.52581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 373.2091979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 367.8141174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.27276611328125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 367.93231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 367.31781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 370.3929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 372.7990417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 371.7530822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 369.74725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.4143981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 369.7756652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 369.1530456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.5810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.6127624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.7926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.69512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 370.4606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 371.65753173828125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 368.5354919433594
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 0 23.0 994.97675791 (11.039747673816453, 7)
loss 371.1240234375
Current State,action,reward,Response time,Next State:  (7, 11.039747673816453) 3 22.0 1063.7901361 (11.271571944085663, 8)
loss 368.0522155761719
Current State,action,reward,Response time,Next State:  (8, 11.271571944085663) 3 21.0 1029.8081916 (11.670334358779868, 9)
loss 371.2868957519531
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 373.488525390625
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 366.2212829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.57928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 369.8735656738281
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 372.7181396484375
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 370.7645568847656
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 367.42559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 367.68902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.7331848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 366.6870422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 371.1044616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.75689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 370.6922912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 374.5044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.02484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.65936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.27142333984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 371.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.1244812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 369.5455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 374.8091125488281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 370.16949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.0675354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 372.08319091796875
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 371.7374267578125
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 368.3542175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.6878967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 368.75482177734375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 367.10015869140625
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 2 20.0 1271.03516211 (16.237094554670044, 10)
loss 371.9118347167969
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 369.6585998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 367.19390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 371.72857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 369.4124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.2686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.67327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.16217041015625
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 369.1026611328125
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 367.6898498535156
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 371.5992736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 370.5595397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 370.7592468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 371.1175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 371.14788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 373.75921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.5689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 372.7385559082031
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 368.6927795410156
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 370.6849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 369.3274230957031
############ Running episode number: 147  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 366.0275573730469
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 369.2615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.4738464355469
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 368.1095275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 369.96728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 372.29962158203125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 366.56927490234375
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 369.6566467285156
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 368.7812194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.5171813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.0074157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 369.2185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.7626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.3021545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 371.41400146484375
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 370.60174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 367.7938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 372.5877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.4118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 369.3257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 369.9994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 368.5157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 373.1835632324219
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 371.51434326171875
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 369.35302734375
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 364.6484680175781
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 370.9216003417969
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 2 21.0 962.389338906 (10.278181486298042, 9)
loss 367.6797180175781
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 370.9197998046875
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 369.3514099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 374.0643005371094
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 370.9039001464844
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 367.47894287109375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 365.1686706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.71435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 373.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.5458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 367.54034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 375.20220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 369.38092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.137451171875
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 367.11517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 370.0274658203125
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 369.4951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 372.3586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 369.53997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 373.0559997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 369.4660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 376.8381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.95794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 371.6880187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 366.2958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 371.7324523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.3127746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 374.2283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 367.71514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 371.1178283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.9853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 367.3027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 372.4888610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 371.0822448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 367.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 366.7963562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.7984313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.07666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 368.8185119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 369.231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.06658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 372.7099304199219
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 0 21.0 1383.93777195 (19.25591252280865, 9)
loss 367.7180480957031
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 365.5888671875
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 370.9451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.7345275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.45196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.5440368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 371.1938781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 370.7948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 370.0226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 370.1053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 368.1166076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.4164733886719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 375.4913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 372.912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.5113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.60113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 370.434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 370.8899841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 370.21478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 370.4226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 369.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 372.17755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.2966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 372.93853759765625
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 370.8903503417969
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 368.33050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 368.4842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.6094665527344
############ Running episode number: 148  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.5451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.67578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 371.7582092285156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 369.676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 372.96966552734375
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 368.6193542480469
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 365.6395263671875
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 367.6060791015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 371.1803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 370.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 369.82720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 372.2804870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 372.290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 366.78253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 370.40228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.6180114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.8729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 371.54876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 375.3952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 371.0611572265625
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 371.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.7142028808594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 368.26239013671875
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 368.2126770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 371.8501281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 369.75042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 367.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.6696472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 371.9136047363281
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 367.60260009765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 369.2175598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 367.5527648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.9130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 368.1148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 368.42706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 371.9217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 369.79742431640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 4 19.0 914.152581784 (10.390165524255663, 11)
loss 375.6846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 370.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 371.4352111816406
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 366.85711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.4787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 374.1998596191406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 367.1421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.2298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 372.1629943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.88714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 369.9190368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.79205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 370.67572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 368.3370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.72491455078125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 364.9217834472656
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 371.7498474121094
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 369.69549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 369.20257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.3239440917969
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 368.5602111816406
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 367.9885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.7491760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 367.6405944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 368.3158264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.2420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 369.50421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.23858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.0836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.79962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.57427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.59942626953125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 367.0231018066406
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 370.4111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.7453308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 371.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 370.2445983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 367.77105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.8386535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 367.8902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.74920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.1117858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.3988037109375
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 366.3464050292969
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 369.25396728515625
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 366.2038269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 371.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.3902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.2090759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 368.4522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 370.6280212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 369.01763916015625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 368.046142578125
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 364.4096374511719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.53607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.1943054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.77593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.2540588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.87335205078125
############ Running episode number: 149  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.1886291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.6831359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.08447265625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 369.10162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.2398376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.17919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.2682189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.48834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 367.52520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 370.2995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 367.2948913574219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 365.0108642578125
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 365.744873046875
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 368.7601318359375
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 369.4913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 372.469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.3002624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 369.81768798828125
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 369.5399169921875
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 367.54595947265625
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 364.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 372.38824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.35003662109375
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 368.9866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 371.0948181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.75238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.89642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 369.97357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.4123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.7061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 366.1710510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 372.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 371.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 368.1536560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 368.9854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 369.68658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.42578125
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 364.8509521484375
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 372.4563293457031
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 373.16656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.4712829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.33990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 366.23345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 367.7017517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 365.7950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 367.46392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.6590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.6897277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 368.8680725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 368.4197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 368.3265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.5397033691406
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 365.9914245605469
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 365.8187255859375
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 1 21.0 1217.71670884 (16.466876895473597, 9)
loss 364.6838073730469
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 4 19.0 1285.07728144 (16.871606159345866, 11)
loss 369.24365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 373.7586975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 367.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.58233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 370.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.7154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.2597961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 367.09808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.4633483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 368.4273986816406
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 367.1913146972656
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 367.1774597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.3927307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.9996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.5416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 366.04547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.6803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 368.11505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 366.0820617675781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.63409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 367.9318542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 370.180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.1916198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.1650085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.52838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.5533142089844
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.70220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 369.1911315917969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 368.2351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.51971435546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.7445373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.6597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 369.6472473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.0646057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.6616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 369.54913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.0809020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.74664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.6396179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 367.85919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.9444580078125
############ Running episode number: 150  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.0956726074219
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 366.89923095703125
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 369.6166076660156
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 366.7597351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 367.2100830078125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 364.6142272949219
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 370.3978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.7312316894531
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 368.610595703125
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 370.5567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.37933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 370.8728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.2154846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 366.2626953125
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 368.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 366.6719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 370.988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.0496520996094
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 366.8569641113281
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 363.920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.40509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.3589172363281
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 368.67938232421875
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 368.12078857421875
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 367.1205139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 368.276123046875
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 366.61785888671875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 366.0064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 371.3023376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.9151611328125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 364.747314453125
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 368.5111999511719
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 370.30377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.4015808105469
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 366.38067626953125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 368.6349182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 369.1412658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 369.4244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 368.1025085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.3826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.1199035644531
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 369.43231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 368.9683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 369.4325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.58599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 371.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.44781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.7033996582031
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 366.29888916015625
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 363.4549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.2492370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.44671630859375
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 2 19.0 1179.43847566 (15.836943704090487, 11)
loss 370.23724365234375
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.2711486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.1859436035156
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.4530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.2337951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.10906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 367.6368103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 366.0772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.352783203125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 366.0465393066406
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 370.126708984375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 364.0029602050781
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 362.79547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.22467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.3744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.8697814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.94671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.5716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.6056213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.5730895996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.59222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.7635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.70916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.62652587890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.0761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 372.2811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.1681823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.57073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.6537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.7839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 367.2057800292969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.8921203613281
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.9145202636719
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 367.58856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 366.14483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 368.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.4813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.4402770996094
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 368.7754211425781
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 368.4906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.35198974609375
############ Running episode number: 151  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.1043395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.58758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.15264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.5562744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 366.10565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.5409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.7935485839844
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 366.0924987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 370.1150817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.4252014160156
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 365.9247741699219
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 365.716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 372.7232360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 366.5032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.299072265625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 365.72491455078125
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 369.0765075683594
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 366.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 367.92718505859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 364.7934875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 366.3062438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.08355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.5013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.6346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 367.33575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 370.4908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 366.5442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.9136657714844
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 365.4919738769531
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 367.4085998535156
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 363.7235107421875
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 368.6999816894531
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 363.3656005859375
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 368.5161437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.33660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 368.67974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.6216125488281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 367.7414245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.90313720703125
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 368.2479553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.3427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.61529541015625
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 366.0720520019531
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 1 21.0 996.702699398 (11.819721938468785, 9)
loss 367.89031982421875
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 369.19873046875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 4 19.0 1024.75516863 (12.501496275411796, 11)
loss 366.87286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 366.3847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.0469665527344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.37939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.7659606933594
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 366.5666198730469
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 367.4391784667969
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 2 20.0 1272.5994393 (17.534967586021782, 10)
loss 364.6032409667969
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 363.55078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 369.3008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.02398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.1159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.2760925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 367.80157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 370.0198669433594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 368.3157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.8671569824219
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.6236572265625
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 365.0911865234375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 366.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 370.2814636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 368.50750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 368.4135437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.4438781738281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.8794860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.3087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.56787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.7518005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 370.2787780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.02783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.7825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.79144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.1488037109375
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 367.32440185546875
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 2 20.0 1217.34610485 (15.892373986997768, 10)
loss 367.6124572753906
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 364.1185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 371.3606262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 372.3089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.7403259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.5191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 372.43609619140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.0078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.3566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.2190856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 368.548828125
############ Running episode number: 152  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.2676696777344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.5047912597656
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 371.3926696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.52435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 368.4799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.73614501953125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 367.5631103515625
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 362.48687744140625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 367.4358215332031
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 367.4765930175781
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 367.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 366.64239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 368.5108642578125
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 365.80596923828125
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 366.1150817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.6546325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.4689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.57269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 368.69219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.9175109863281
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 370.43658447265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 370.3951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.7660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.55987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 373.4676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.41143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.34405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 369.27691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.51007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.2592468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 366.9068298339844
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 366.74700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.0613708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.8132629394531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 366.27423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.4850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.0637512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.3440246582031
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 365.489501953125
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 368.47381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 368.77734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 367.55267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.18902587890625
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 367.1242980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.2850646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.5344543457031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.6023864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.6471862792969
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.3840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.09332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.2352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.1751403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.8070983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 365.93255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.6024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 372.5853576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.5356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 369.8853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.41815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 367.70745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.29754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 368.8563537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.8177185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 367.24072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.1839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.70635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.7682800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 368.1617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 366.96466064453125
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 368.84881591796875
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 366.3698425292969
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 369.5572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.9703063964844
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 368.17291259765625
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 365.00115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.8167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 365.85614013671875
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.086669921875
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.43353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.1620788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.8399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.2408447265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.5301513671875
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 367.0374450683594
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 366.30523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.5348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.7286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.3434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.3887634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.272216796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.8106384277344
############ Running episode number: 153  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.8406066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 367.08233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 370.3036804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 367.78057861328125
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 363.412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.33990478515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 366.53350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.2298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 366.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 367.7250061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 369.86810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.4260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 366.2733459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 366.1297912597656
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 363.2885437011719
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 363.1359558105469
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 363.098388671875
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 365.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.01922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 371.5501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.9729309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.17901611328125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 365.7503967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.0292053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 369.30133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.5941467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 366.48016357421875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 369.6763610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.57000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.6450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 366.2091369628906
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 367.3495178222656
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 365.1638488769531
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 364.59320068359375
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 364.161865234375
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 367.7010803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.9961242675781
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 364.15118408203125
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 366.211669921875
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 364.5309143066406
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 367.46630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.6318054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.43817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.1625061035156
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 364.16656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.9196472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.43988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.3433532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 366.71942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 370.2787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.05230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.8378601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.3114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.6251525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 367.0345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.60479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.5777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.3966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.1026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.2969055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.26361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 368.1282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.8150329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.4539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.4547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.92828369140625
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 366.9059753417969
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 370.1596374511719
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 365.5347595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.57672119140625
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 366.153564453125
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 364.2886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.64678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.95904541015625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 363.6590576171875
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 365.7917175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.8863220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.4732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.18768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 366.71160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.3074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.61328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.5399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.9562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.4194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.5726623535156
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 367.2883605957031
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 366.4871520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.30010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.0398254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.4884338378906
############ Running episode number: 154  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.6826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.9701843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.5726013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.68560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.64703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 366.7296447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 368.8421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.5018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.77734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.3123474121094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 367.27520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.3123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.6263122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.0345153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 369.31536865234375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 363.4698181152344
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 363.8313903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.62493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 370.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.69873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.9295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.1026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.62823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.552490234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 364.3938903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.6168518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.65277099609375
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 364.7621154785156
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 368.2060852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 368.38690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.472412109375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 369.552734375
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 365.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.8935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.8180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 367.7352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.7651062011719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 366.0724182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 368.8691711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 368.80902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.5188293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.1871643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.6987609863281
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 364.81207275390625
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 0 22.0 975.550709187 (11.670334358779868, 8)
loss 366.9602355957031
Current State,action,reward,Response time,Next State:  (8, 11.670334358779868) 3 21.0 1053.11377918 (11.819721938468785, 9)
loss 364.1972961425781
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 367.6195068359375
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 366.1937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.4072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.1976623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 368.69287109375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.41961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.21575927734375
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 2 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.10406494140625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 363.0828552246094
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 366.2060546875
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 366.39617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.4007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 366.73773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.5487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.7825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.6962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.09521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.42095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.6123962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 365.4958190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.2560119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.72247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 366.8290100097656
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 367.6990661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.66876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.83843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.7803649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.2843322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.9584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.53204345703125
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 367.4551086425781
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 4 19.0 1220.65695786 (15.954793861767499, 11)
loss 365.35662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 367.48248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.1373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.4183044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.6227722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.7286682128906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.19940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.2945251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.0158996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.69195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.3919982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.5484924316406
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.6953430175781
############ Running episode number: 155  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.3372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.3009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.2149963378906
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 362.8821716308594
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 365.5555725097656
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 362.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.5409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.82977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.9568176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.7148742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.6407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.85888671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 367.0898132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.10760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.1759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 368.2647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.4480285644531
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 363.2164001464844
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 364.1602478027344
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 368.2048034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.6124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.9634704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.60931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 367.4241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 369.11639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.95977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.2268371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.0990295410156
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 365.8055419921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 363.7645568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 366.44873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.0168762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.2283020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.1192321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.8247985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 366.4660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.9958190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.0226745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.5203857421875
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 365.85955810546875
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 362.1366882324219
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 369.25592041015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 363.8241882324219
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 366.9893493652344
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 364.85205078125
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 366.2721252441406
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 363.53985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 370.0786437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.7030944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.5943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.89715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 367.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.79376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.25
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.1272888183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.6853332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.919921875
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.51904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.4840393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 369.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.1146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.66558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.9165344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.9166564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.8221435546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 367.3647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.5794982910156
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 368.1286315917969
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 365.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.3580017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.4413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.61712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.3350524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.0789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 367.374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.5056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.2806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.24700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.0552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.5346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.0983581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.57513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.6787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.32781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 368.5282287597656
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 368.8901062011719
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 361.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.8385314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.4194030761719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.08636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.34771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.19207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.7290344238281
############ Running episode number: 156  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.87982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 370.57666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.7054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 367.7806396484375
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 367.5966796875
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 369.8077697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 368.60858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.0330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.7129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.3411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.8576354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.65679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.9184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.6391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.2725524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.8896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.2635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.4039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.35205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.6856994628906
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 364.759033203125
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 363.39312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.2333679199219
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 363.77105712890625
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 367.0868835449219
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 366.966552734375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 362.012939453125
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 363.5140075683594
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 365.9197082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.2240295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.5681457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.5713195800781
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 366.333984375
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 365.18499755859375
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 4 19.0 927.721874973 (10.316955310454549, 11)
loss 365.4894104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.37664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.7286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.1093444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.18780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.6184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.28582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.74298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.3136291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.6077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.59405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.4269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.18798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 365.1524963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.8562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.5481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.44903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.02960205078125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 363.22760009765625
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 364.8487548828125
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 1 21.0 1217.71670884 (16.466876895473597, 9)
loss 367.3880920410156
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 363.2044982910156
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 4 19.0 1272.5994393 (17.534967586021782, 11)
loss 364.5602111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.3876647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.9653625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.7330627441406
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 367.3240051269531
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 4 19.0 1368.06085906 (19.02839494033929, 11)
loss 363.34307861328125
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 364.3003845214844
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 361.85125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.9762268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 368.17950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.1849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 366.23211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 367.4610290527344
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.3095703125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.6030578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.162841796875
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 362.3707275390625
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 364.46435546875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 4 19.0 1238.94234737 (15.950694610794756, 11)
loss 365.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.2528381347656
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 365.8341369628906
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 368.7685546875
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 366.3520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.02044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.2456359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.1285095214844
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 364.9168395996094
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 364.3883972167969
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 365.6554870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.78009033203125
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 366.33251953125
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 363.6270446777344
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 365.14508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 368.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.34466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.9536437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.0450744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.9679260253906
############ Running episode number: 157  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.4684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.1437072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.31146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.1856384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 367.1632385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 366.482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.63720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.20849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.7522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.4304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.4831237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.2731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 366.3404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.37567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.3447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.40478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.60174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.3763732910156
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 361.4411926269531
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 365.9850158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.8031921386719
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 364.6382751464844
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 363.3815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.5461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 368.1501770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.3720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.5743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 368.3517150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.63824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.5002136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.2943420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.8421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.6664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.7259521484375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 2 19.0 917.140709305 (10.425974763084863, 11)
loss 362.57147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.5807189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.8042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.4481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.44683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.1825256347656
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 364.8233947753906
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 365.3402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.14886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 368.7822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.7222595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.21148681640625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.75335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 366.5196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.5021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.1355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.2395324707031
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 364.6473388671875
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 368.7846374511719
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 360.54766845703125
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 364.9324035644531
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 366.2314758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.4192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.4137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.6446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.9580993652344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.6553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.1415100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.8877258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.78082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 367.59759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.36492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 367.28277587890625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.97467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.46600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 365.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.2165222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.21539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.5269470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.69769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.1991271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.6149597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 366.7583923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.0654602050781
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 371.31317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.2146301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.77301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.6109313964844
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 365.81573486328125
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 367.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.9883117675781
############ Running episode number: 158  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.89691162109375
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 363.8260498046875
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 364.6656188964844
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 363.8347473144531
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 366.2643127441406
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 361.6956481933594
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 367.76904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.87799072265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 363.36505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.6795959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.7769470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.3984680175781
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 363.4810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 366.29083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.3818664550781
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 366.3375244140625
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 2 20.0 931.912703681 (10.433149880183072, 10)
loss 363.3948974609375
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 363.6474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.22802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.3952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.2445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 364.3436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.3038024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.3908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.6532897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.63592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.87176513671875
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 363.6159973144531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 365.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.6189270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.82257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.0693664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.8037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.99798583984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 364.685546875
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 362.7337951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.2441101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.6301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.1702575683594
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 361.8385314941406
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 4 19.0 1034.00195058 (11.819721938468785, 11)
loss 366.39715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 366.22332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.77001953125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 364.63818359375
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 364.5369567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.4192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.2173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.82098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.58514404296875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 364.4537658691406
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 364.8291931152344
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 364.6143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.9904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.7784118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.6302795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.8095397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.5636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.7193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.5874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.9125671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.1076354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.3631286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.6821594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 367.7162170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.05859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.3468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.0187072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.0006408691406
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 364.5810546875
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 363.88116455078125
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 367.4390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.2708435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.62969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.2632751464844
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 360.3760986328125
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 365.751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.0115051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 366.3867492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.3006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.8271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.3826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.3277893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.0451354980469
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.38623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.079345703125
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 363.1588134765625
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 363.96075439453125
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 366.32354736328125
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 361.8086853027344
############ Running episode number: 159  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.6734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.3460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.78533935546875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 364.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.1609802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.9485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.6513977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.38641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.69708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.1846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.7824401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.4454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.9627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 365.4695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.7474670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.28594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.36541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.7935485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.86553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.8257751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.1713562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.31884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.1063537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.0148620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.05401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.3677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.4441833496094
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 363.5133972167969
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 2 20.0 924.314209939 (10.24826025489064, 10)
loss 363.8921813964844
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 366.8547058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 366.7839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.42889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.4831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.40521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.9734191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.8450622558594
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 362.77618408203125
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 364.1396484375
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 363.6910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.3909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.27886962890625
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 1 20.0 951.466016946 (11.271571944085663, 10)
loss 365.904052734375
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 362.1860046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.63226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.63641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.1547546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.0541687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 366.567138671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.3496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.9772644042969
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 362.8738708496094
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 363.0318298339844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 365.1942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.2092590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.66705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 368.2627258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.49896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.62872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.03582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.4043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.4996643066406
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 365.615478515625
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 366.1611022949219
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 362.786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 367.1390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.60052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.9777526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.2762145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 367.19061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.0809631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.8583679199219
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.66455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.40313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.4920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.28802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.9480895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.77252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.6818542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.27099609375
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.6641540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.4654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.4606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.9069519042969
############ Running episode number: 160  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.11212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.0683898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.90380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.8124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.52734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.7882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.4615173339844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 364.8262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.2872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.5047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 368.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.2640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.3790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.80267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.84552001953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 365.57952880859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 366.3541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.7443542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.1011047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.40411376953125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 366.0939636230469
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 365.1177978515625
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 364.204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.8450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.4072570800781
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 368.33154296875
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 362.52008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.3866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 366.1325378417969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 365.29412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.49249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.7704772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.23809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.9986877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.5771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.6440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.13360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.74560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 366.5848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.55389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.2304992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.4360656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.4180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.9573669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.10797119140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.08441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.3406677246094
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 363.462890625
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 366.6589660644531
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 366.6805114746094
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 364.2037048339844
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 364.7074890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.09820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.3117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.3286437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.4749755859375
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 362.6489562988281
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 362.56610107421875
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 362.6474304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.70513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.6950378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.08026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.3743896484375
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 366.9541320800781
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 362.74835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.6690979003906
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 366.4784851074219
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 363.8116149902344
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 364.40606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.0841979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.2619934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.6323547363281
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.0856628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.8769226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.3088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.3444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 366.477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.4235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.8556823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.17144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.4393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.7999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.3175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.2405700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.8431701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.2185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.5237731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.4469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.4100036621094
############ Running episode number: 161  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 365.6982421875
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 362.9134826660156
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 361.7927551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.30267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.6571350097656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 364.3008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.8265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.0334167480469
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 366.45465087890625
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 366.7676086425781
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
loss 364.5604553222656
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 4 19.0 986.786261176 (10.772009508959538, 11)
loss 365.0659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 369.0341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.8462219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.57379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.1144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.5716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.6669616699219
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 364.54058837890625
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 365.2549133300781
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 363.635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.4007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.7460021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 367.8802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.88629150390625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 366.4889831542969
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 363.87957763671875
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 363.7149353027344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 363.6805114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.69012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.5473327636719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 363.4483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.03289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.0281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.4499206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.1760559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.76873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.5182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.7945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.4682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.5846252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.5019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.31304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 367.6996154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.2367248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.0528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.2887878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.48870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.38836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.58551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.4962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.61956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.50726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.71881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.19219970703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.2862243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.68328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.5435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.51702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.6524963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.5294189453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.6113586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.8045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.1656799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.76824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.18768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.98687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.9517517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.3207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.98773193359375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 363.3036193847656
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 364.9278564453125
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 0 22.0 1213.1314661 (15.817158911312735, 8)
loss 366.5788879394531
Current State,action,reward,Response time,Next State:  (8, 15.817158911312735) 3 21.0 1295.47409005 (15.829956988360925, 9)
loss 363.8587341308594
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 1 22.0 1251.7376675 (15.892373986997768, 8)
loss 367.6601257324219
Current State,action,reward,Response time,Next State:  (8, 15.892373986997768) 3 21.0 1299.87001973 (15.954793861767499, 9)
loss 364.87017822265625
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 365.2147216796875
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 364.7457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.2236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.2149963378906
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 365.72845458984375
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 363.7833251953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 4 19.0 1242.02029803 (16.667936385136993, 11)
loss 363.8455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.1349792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.1499328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 367.1703796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.4200134277344
############ Running episode number: 162  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.5830383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.2240905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 367.487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.3756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.1870422363281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 365.58660888671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 364.1211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.7181396484375
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 362.691650390625
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 366.11383056640625
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 366.8192443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.68365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 367.5758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.1807556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 367.4924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 368.0370788574219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 366.35919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.3326721191406
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 364.36431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 369.5372314453125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 365.4449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 366.7672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.2128601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.1216735839844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 366.2196350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.9943542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.53515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.28643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 366.64849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.7069091796875
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 365.127685546875
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 2 21.0 958.973513426 (10.236272697871373, 9)
loss 363.1011962890625
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 363.3028564453125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 365.9945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.46661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.30352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.4542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.66473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.8194274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 366.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.5550231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.26885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.4178771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.4804992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.1757507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.4872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.1747131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.8337707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.8401184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.06475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.7900085449219
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 363.5810546875
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 366.15020751953125
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 367.5353088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.47674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.3586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.7887268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.06280517578125
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 364.7899169921875
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 365.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.6199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.2316589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.1044616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.45989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.80718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.23388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.64459228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 367.30474853515625
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.2280578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.7766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.68023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.05718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 367.9442443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.7650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.27044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.9281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.5181579589844
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 366.3338928222656
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 363.253173828125
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 363.3007507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.02239990234375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 365.5617980957031
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 362.7312316894531
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 364.9708557128906
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 366.1527099609375
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 2 20.0 1238.52642122 (16.295120821876548, 10)
loss 362.5658874511719
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 365.7142639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.3819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.77386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.3662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.4275817871094
############ Running episode number: 163  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.5956726074219
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.0375061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 367.20306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.2622985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 367.7726135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.39727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.0474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.25604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.40496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.86114501953125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 362.6482849121094
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 0 23.0 986.786261176 (10.772009508959538, 7)
loss 364.56365966796875
Current State,action,reward,Response time,Next State:  (7, 10.772009508959538) 3 22.0 1047.1055689 (10.644925616761762, 8)
loss 368.9792175292969
Current State,action,reward,Response time,Next State:  (8, 10.644925616761762) 3 21.0 993.183975462 (10.58735855349979, 9)
loss 366.2981262207031
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 4 19.0 977.313511661 (10.552868829802469, 11)
loss 367.20574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 367.4048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.02728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.7942199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 368.16082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.7527160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 366.8869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 369.22259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.51898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 370.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.2197570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.78863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.2156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 366.18646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 366.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.49383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 367.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.8948669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.9573669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.77264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.3271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.3545227050781
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 368.0256042480469
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 363.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.20404052734375
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 361.02508544921875
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 368.5379943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.8825988769531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 364.3460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 369.73175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.4193420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.63665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.91949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.8623352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.31085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.64617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.6391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 366.6507873535156
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.4192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.34967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.53594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.0576477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.35760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.6346740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.7035827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.31097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 368.26666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.14404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.4350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.3668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 368.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.28253173828125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 367.2584533691406
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 363.9271240234375
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 364.47760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.4529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 369.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.8629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.38079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.9283447265625
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.0074157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.1306457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.6199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.54205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.0634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.1911315917969
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 366.57391357421875
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 366.39239501953125
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 365.6231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.6704406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.5888977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.0798034667969
############ Running episode number: 164  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.18341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.4129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 368.393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 366.4700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.8185119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.656982421875
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 367.3445129394531
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 366.3483581542969
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 1 21.0 951.434021987 (10.819208572963639, 9)
loss 366.02099609375
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 368.53155517578125
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 367.7192077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.3860778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.5278015136719
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 366.91461181640625
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 363.41802978515625
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 365.3788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.1893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.3592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 368.8260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 364.2526550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.2881774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 364.0839538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.5653991699219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 363.8888854980469
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 368.54327392578125
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 0 23.0 961.129617982 (10.268274366284802, 7)
loss 364.7178649902344
Current State,action,reward,Response time,Next State:  (7, 10.268274366284802) 3 22.0 1015.71444152 (10.335411397720526, 8)
loss 365.247802734375
Current State,action,reward,Response time,Next State:  (8, 10.335411397720526) 3 21.0 975.09448038 (10.305649118067803, 9)
loss 370.48004150390625
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 368.0486755371094
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 366.30413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.57861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 366.5455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.7811584472656
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 365.2311706542969
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 363.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.43804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.4562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.4607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.1125183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.1097106933594
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 366.4373474121094
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 367.1532897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 367.58074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.37310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.67071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 370.89691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.7247619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.43402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.6704406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 369.1970520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.3716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.40240478515625
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 367.61541748046875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 2 20.0 1251.130943 (16.871606159345866, 10)
loss 368.6333312988281
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 362.78668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.4795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.7239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 367.31048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 367.7649841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.0935974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.4871520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.4996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 368.5578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 365.9506530761719
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 364.5405578613281
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 0 23.0 1437.85991935 (19.223969507401588, 7)
loss 363.9599304199219
Current State,action,reward,Response time,Next State:  (7, 19.223969507401588) 3 22.0 1573.80408654 (19.25591252280865, 8)
loss 363.9963073730469
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 2 22.0 1496.45133993 (19.08360399753829, 8)
loss 366.3827819824219
Current State,action,reward,Response time,Next State:  (8, 19.08360399753829) 0 24.0 1486.3808035 (18.668181536495972, 6)
loss 361.7815246582031
Current State,action,reward,Response time,Next State:  (6, 18.668181536495972) 3 23.0 1632.09570747 (18.375894992990247, 7)
loss 368.11053466796875
Current State,action,reward,Response time,Next State:  (7, 18.375894992990247) 3 22.0 1520.95485516 (17.82724819986867, 8)
loss 370.2207946777344
Current State,action,reward,Response time,Next State:  (8, 17.82724819986867) 1 23.0 1412.95334646 (17.229782241685768, 7)
loss 364.70184326171875
Current State,action,reward,Response time,Next State:  (7, 17.229782241685768) 3 22.0 1449.53285514 (16.84211602880065, 8)
loss 370.01947021484375
Current State,action,reward,Response time,Next State:  (8, 16.84211602880065) 3 21.0 1355.37749867 (16.237094554670044, 9)
loss 370.376953125
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 363.78948974609375
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 363.52398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.1851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 371.69891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 365.63018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.4371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 370.7555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 372.390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 367.7543029785156
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 368.6120300292969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.8622131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 367.51885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.3064880371094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 372.1891784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 368.0929260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.4105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.6272888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 370.0989074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.9002380371094
############ Running episode number: 165  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 370.0350341796875
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 371.2442321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 368.698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 369.7749328613281
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 370.04119873046875
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 366.7834777832031
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 367.0332946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.43243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 367.1907653808594
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 369.37115478515625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 367.56634521484375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 364.9844665527344
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 367.0466613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 368.767578125
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 371.718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.1266784667969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 371.574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 367.30816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.4870910644531
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 369.2514953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.7183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 366.22064208984375
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 366.9039001464844
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 368.20330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.4288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.3487854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.6075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 370.70782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 366.811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 369.2084045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.7892761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.29779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.06884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 368.5398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.5881042480469
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 368.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 369.5676574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 371.64385986328125
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 367.4383544921875
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 368.4207763671875
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 367.9875183105469
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 368.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.8682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 367.8648986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 371.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.2758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 366.1016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 365.80322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 371.70068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.61016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 367.5952453613281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.8196105957031
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 2 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.3305969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 366.0010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.80767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 365.6090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.9021301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.2137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 370.7560729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.4751892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 371.94769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 367.72283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 370.62451171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.6035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 367.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 367.0050964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 368.7740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 371.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.9892272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.8399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.14532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.96392822265625
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 368.3966979980469
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 369.9609069824219
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 373.6700744628906
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 366.1085205078125
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 366.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 368.8239440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.1527404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 368.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 368.3421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.2221374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.4966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.48974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.87664794921875
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 370.9315490722656
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 2 20.0 1238.52642122 (16.295120821876548, 10)
loss 366.11346435546875
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 368.6190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.05615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 369.7089538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 367.8020935058594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.4108581542969
############ Running episode number: 166  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.13616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.4216613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 374.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.6770935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.7082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.7901306152344
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 365.2387390136719
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 366.2658996582031
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 1 21.0 960.915933313 (10.931193889570471, 9)
loss 368.1928405761719
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 365.9122009277344
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 363.1217956542969
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 363.212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 368.42529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 367.9770202636719
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 366.12689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 366.6864929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 370.2877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 368.00830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 368.6204528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.0071716308594
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 367.4099426269531
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 364.0196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 369.03076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.64739990234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 367.529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 368.2948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.67138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 369.15521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 373.567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.6831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.5506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 369.8807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 368.4888610839844
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 364.8218078613281
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 367.2437744140625
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 368.9374084472656
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 372.7658996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.222412109375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 367.59814453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 4 19.0 930.696774523 (10.546025383098053, 11)
loss 366.5289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 367.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.35137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 369.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.6839904785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 368.1701965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 368.1444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.81158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.33477783203125
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 363.93853759765625
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 0 22.0 1024.75516863 (12.501496275411796, 8)
loss 365.87799072265625
Current State,action,reward,Response time,Next State:  (8, 12.501496275411796) 1 23.0 1101.69086701 (13.168618569876575, 7)
loss 369.0487976074219
Current State,action,reward,Response time,Next State:  (7, 13.168618569876575) 3 22.0 1196.45441106 (13.649658108197247, 8)
loss 367.4433898925781
Current State,action,reward,Response time,Next State:  (8, 13.649658108197247) 3 21.0 1168.79494995 (14.283719188889453, 9)
loss 367.53082275390625
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 366.60113525390625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 364.5908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 371.7744445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.7296447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.7216796875
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 369.5235595703125
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 372.2416076660156
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 367.7675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.8096008300781
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 365.91864013671875
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 367.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 369.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 367.7731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 367.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.4249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 371.6363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 368.414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 369.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 369.4431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.8887634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 366.0428771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 372.8271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 370.744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.7593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.5567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 369.4273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 371.4015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 370.298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.4389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.18408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 368.4867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.3200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.5067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.38836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.5224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.72772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.35150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 369.1293640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 370.0223083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.6186828613281
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 370.84814453125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 368.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 369.5545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 371.0596923828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.37738037109375
############ Running episode number: 167  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.3792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.39898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 370.4088134765625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 370.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 365.5424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.7438049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.63177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.0381774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.4137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 371.3017272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 370.5799865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.6632995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.00213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.4996032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 368.56884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 366.548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.6653137207031
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 369.05859375
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 367.0945739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.38336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 369.47027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.4628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.77593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.3029479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 368.7165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.7880859375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 369.3767395019531
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 371.46673583984375
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 1 21.0 922.331700166 (10.335411397720526, 9)
loss 368.2122497558594
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 368.7583923339844
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 362.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.5801086425781
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 366.4744873046875
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 364.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 366.1251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.62774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 372.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 370.447021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 366.3775329589844
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 367.4507141113281
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 4 19.0 975.14992417 (10.655373370049301, 11)
loss 368.7511291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.19720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.0539855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 368.30731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 369.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 372.7411193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.17022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 367.114013671875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 368.74029541015625
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 368.0538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.4993591308594
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 368.88739013671875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 367.09185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.2677917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.4186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.09881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 369.28472900390625
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 371.1900939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 369.2431335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.2941589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.6322326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.2225036621094
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.23492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 370.9028015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 367.7582702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 369.41729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 370.25152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 373.8437805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 371.1523132324219
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 365.3072509765625
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 370.22479248046875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 364.5158996582031
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 369.2003173828125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 4 19.0 1323.29060362 (17.229782241685768, 11)
loss 366.29376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 369.9918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.08648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 368.37628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.2403259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 371.16510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.11090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.82281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 369.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 367.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 367.4775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.28082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.2604675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 366.0439758300781
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 366.6286926269531
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 366.6513671875
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 366.8350524902344
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 365.9170227050781
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 369.87945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.6473693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 368.445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 369.1537780761719
############ Running episode number: 168  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.5927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.4364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 367.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 369.5380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 371.4636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 366.3096618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 368.4435729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 372.5711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.8633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 374.60650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.8070373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.8595886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.1950378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 370.2862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 371.0198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.7010803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.8364562988281
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 370.59564208984375
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 365.8371887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.2674560546875
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 369.396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 371.6044616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 369.241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 366.0809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 368.53985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.6292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 367.7544860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 367.9853820800781
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 367.8077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.0009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 367.14324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 369.58123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.2820739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 368.8439025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 367.33837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.4703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.44464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 370.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.74847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.7939758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 370.2261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.68231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.5730895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 370.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 373.3789367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 368.49481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 366.13720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 367.54339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.5712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 371.4626159667969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.1924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.38653564453125
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 368.893310546875
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 367.3496398925781
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 366.7187194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.6301574707031
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 373.0617370605469
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 0 22.0 1217.71670884 (16.466876895473597, 8)
loss 367.9402770996094
Current State,action,reward,Response time,Next State:  (8, 16.466876895473597) 3 21.0 1333.44672445 (16.871606159345866, 9)
loss 367.9261474609375
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 367.6955871582031
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 366.3004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.4643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 366.6415710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 372.77325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 371.626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.27252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 367.2270812988281
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 362.3016662597656
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 1 22.0 1425.04216908 (19.385636054792762, 8)
loss 370.46820068359375
Current State,action,reward,Response time,Next State:  (8, 19.385636054792762) 2 22.0 1504.03300517 (19.223969507401588, 8)
loss 365.2286682128906
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 369.3441162109375
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 369.61419677734375
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 365.73443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 370.345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.2031555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 366.12823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 369.96905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 366.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 370.4006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 370.63079833984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.58306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 369.1271667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.25421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 369.10015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 369.6991882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.3249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.4050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 369.34326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.4423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.1482238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.5274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.1177673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.3013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.11773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 368.1403503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.3624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 370.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 368.84393310546875
############ Running episode number: 169  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.0933532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.9253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 371.09527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 369.7083435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.22265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.9049377441406
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 364.69989013671875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 369.673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.4377136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 369.4056091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.7855529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 369.7018737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 368.42138671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 368.7575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.32342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 368.2107238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 367.9854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.4127502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 367.8164367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.4549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 372.8096618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.5391845703125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 369.0142822265625
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 370.99029541015625
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 1 21.0 925.023825574 (10.30224719189987, 9)
loss 371.0623474121094
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 0 23.0 962.389338906 (10.278181486298042, 7)
loss 366.33905029296875
Current State,action,reward,Response time,Next State:  (7, 10.278181486298042) 3 22.0 1016.33182085 (10.268274366284802, 8)
loss 366.8716735839844
Current State,action,reward,Response time,Next State:  (8, 10.268274366284802) 3 21.0 971.170670341 (10.335411397720526, 9)
loss 370.2150573730469
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 367.4170227050781
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 369.1001281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 372.8741760253906
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 370.651611328125
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 367.66436767578125
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 367.2989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.7725524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 369.489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.634521484375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 365.7515563964844
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 368.5593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 371.96221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 374.3563537597656
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 365.22802734375
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 1 21.0 949.018321829 (10.924797168745895, 9)
loss 369.0491027832031
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 368.15863037109375
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 366.6031799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 371.0668029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.6520690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 371.3516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 369.46099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 367.4044494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.20050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.1283264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.15411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.29132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 366.2244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 370.0843200683594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 367.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 367.11651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 370.3075256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 368.12493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 372.8570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.2751159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.4396057128906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 374.38250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 371.1170349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 369.98687744140625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 368.7872009277344
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 368.3711853027344
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 367.1873474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 371.53350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.20098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 368.28778076171875
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 365.2412414550781
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 365.84368896484375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.4234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.36163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 368.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.2999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.0299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 369.0315856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.65850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.7719421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.8038635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 371.8329772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 369.1869812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.5998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 370.4683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 368.64862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 368.6389465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 367.57843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.6446228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 368.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 370.23822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 369.3557434082031
############ Running episode number: 170  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 368.6801452636719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 368.7946472167969
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 371.74835205078125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
loss 371.4233703613281
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 372.55047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 369.53546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 369.4945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.3894958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 368.1634826660156
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 366.7763977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 373.17327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 368.0897521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 375.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 369.1148376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 368.3122863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.6965026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 379.114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 373.2398986816406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 368.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.6271057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 370.1787109375
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 372.5323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 368.1281433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 366.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 368.576416015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 369.9999084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 370.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 368.143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 369.40625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 367.14727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.0922546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 366.4709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 370.4238586425781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 366.2816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 372.1600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 368.7413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 368.8446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.5482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.464599609375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 366.6963806152344
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 364.6758728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 368.7402648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.9822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 369.5728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.25140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 366.2545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.24969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 371.69671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 367.8316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 369.5794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 368.2423095703125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 367.6389465332031
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 368.9042053222656
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 367.99163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 373.30218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 371.16864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 374.5450134277344
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 2 19.0 1238.24711194 (16.871606159345866, 11)
loss 371.25531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 373.50177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.5986022949219
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 365.3539123535156
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 1 21.0 1329.50910109 (18.385807405229915, 9)
loss 370.3623352050781
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 365.2171936035156
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 4 19.0 1368.06085906 (19.02839494033929, 11)
loss 367.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.64599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 372.5340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 369.121826171875
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.43670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.43505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 369.4923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 369.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 367.07977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 370.9171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 369.4542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.6433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.4378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 368.41015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 369.873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 369.7187805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.9710693359375
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 368.75030517578125
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 369.5209655761719
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 365.6722717285156
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 371.0212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 365.1871643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 369.8850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 371.5993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.2083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.76934814453125
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 372.1915283203125
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 370.3008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 371.2750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.96923828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.6184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 370.2319030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.9897155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 370.3778381347656
############ Running episode number: 171  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 367.3414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 372.1237487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.6407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 368.2666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 367.0763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 367.6502380371094
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 366.3169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 376.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 369.4332580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 373.22308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 367.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 367.5247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 366.744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 369.47772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.6142883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 368.6358947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.5636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 367.0719299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 368.7580261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 367.94281005859375
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 367.0947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 370.3534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 369.64007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.75653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 369.1252746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.30517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 367.7273864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.9538879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 369.9895935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.5379943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 373.3087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 369.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 369.25054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.1971740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 370.5408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 370.4974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 371.54644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 368.5194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 368.3686828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 367.2936706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 370.5280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.0463562011719
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 370.5429382324219
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 369.22222900390625
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 371.08856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.7034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 370.23394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 370.96771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 368.50048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 368.2959899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 369.6211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.7705383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 368.3185729980469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.14752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 367.2344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.8468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 368.15460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 371.7019958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 372.7626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.3941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 370.24212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.6430358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 369.13031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.2605285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 369.4861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.6935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 370.9129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 369.0711364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 370.3947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 366.2366638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 372.7822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 367.4331970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.4791564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 369.1842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 371.6595153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 370.6791687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 369.1369934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 368.1636962890625
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.3820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.41131591796875
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 373.86859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 368.1582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.1529235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.74810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 368.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 368.1773986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 368.51519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.5995178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.7871398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.0110778808594
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 364.7247619628906
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 365.29986572265625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 4 19.0 1271.23153331 (17.052961248403161, 11)
loss 367.3868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.3678283691406
############ Running episode number: 172  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.70452880859375
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 365.00299072265625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 1 21.0 986.02903554 (11.336751742492702, 9)
loss 372.1763000488281
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 366.602294921875
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 370.89556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 367.68994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 370.7059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.8082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.0421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 368.1947021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 369.9731140136719
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 368.17547607421875
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 371.0166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.2335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.9068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.9593200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.45538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 369.3650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 366.17694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 367.5176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 370.53912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.2500305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 370.6144104003906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 370.4622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 371.0602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 368.1482849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.4298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 369.2585144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 372.3209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 368.5615539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 369.71044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.71063232421875
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 366.4340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.63519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 367.0300598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 372.6614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.42816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 370.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 370.0968933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 369.07659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 369.88818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 367.70782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 368.4531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.2434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 365.42242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.74591064453125
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 367.3738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 368.7394714355469
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 368.5325012207031
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 369.2020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 369.9054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.61077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 367.4940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.4372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 370.3851013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 368.553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.1456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.77203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 367.3902893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 368.7721252441406
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 366.95263671875
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 364.6357116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 368.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 367.8807678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 368.0787048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 370.2247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.32244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 369.0411682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 372.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 375.4750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.8096008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 370.4054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 367.1266174316406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 368.3644714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.0659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 367.6045837402344
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 368.01031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.3016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 368.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.1139221191406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 366.58343505859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 4 19.0 1207.88915169 (15.954793861767499, 11)
loss 369.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.7250061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.65411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 369.8742980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.9077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 371.2693176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 369.5426940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 369.078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 368.2646789550781
############ Running episode number: 173  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 370.76373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.8790588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 371.66436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 368.5789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 367.2274475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 370.6016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.84515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 369.4101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.4317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 369.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 368.7951354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.72210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 369.1413879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 372.62738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 366.25225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 367.3299255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.50360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.2904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.7279968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.5973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 369.4152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.5447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 368.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 369.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.60052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 368.7771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 371.3341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 367.2283630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.78607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.67828369140625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 367.940673828125
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 364.8638000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 367.77484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 368.2057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 368.7766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 368.3022766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.5450744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.74884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 368.6346130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 368.6593933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.65399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 366.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 366.13189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.89300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 373.26409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 366.8052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.68634033203125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 365.0897216796875
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 363.53485107421875
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 365.3229064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.42254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 367.54620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.19659423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.6260681152344
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 366.6762390136719
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 0 22.0 1272.5994393 (17.534967586021782, 8)
loss 363.7541198730469
Current State,action,reward,Response time,Next State:  (8, 17.534967586021782) 3 21.0 1395.8710659 (17.669285735563751, 9)
loss 368.0088195800781
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 363.5938415527344
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 369.11163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.5458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.8848571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.5851135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 367.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.84100341796875
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.5088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.22052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.8968200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 365.4346008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.32354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 367.3617248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 366.6731872558594
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 366.23541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 369.2394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 366.0927429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.4808654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 367.4424133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.17620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 370.5119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.2317199707031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.36090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 370.59564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 367.37420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.7513122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.8524475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 368.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.8263854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 368.6717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 367.775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.3747863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.2239685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.1880798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 370.9474182128906
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 365.49713134765625
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 371.8033447265625
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 365.9931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.1005554199219
############ Running episode number: 174  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.0840759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 367.6048889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.6128234863281
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 365.1432800292969
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 1 21.0 986.02903554 (11.336751742492702, 9)
loss 368.0181579589844
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 367.05255126953125
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 368.1576232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.43975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.7532653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 372.7452697753906
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 370.9881896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.6531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.14154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 369.9399108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.5486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.6316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.7923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 367.10302734375
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 370.1766662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 370.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.46014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.49041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.8321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.72393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.1637268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 369.5927734375
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 365.3996887207031
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 369.7039489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.1891174316406
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 365.4212951660156
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 364.2427978515625
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 364.8509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.3852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.7864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.11480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.9544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.10223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 368.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.6371154785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 366.5086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.34674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 366.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.3484191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.3204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 367.4711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.5726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.85186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.8962097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.9819641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.1985168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.0723571777344
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 1 20.0 1063.96010023 (13.649658108197247, 10)
loss 364.9847717285156
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 363.30230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.73248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.72625732421875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 366.9490966796875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 366.5610656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 367.244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 366.87451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.1020202636719
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 0 21.0 1354.73183582 (19.02839494033929, 9)
loss 366.9365234375
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 0 23.0 1419.16011 (19.286321916040979, 7)
loss 366.626220703125
Current State,action,reward,Response time,Next State:  (7, 19.286321916040979) 3 22.0 1577.68968482 (19.340464848017284, 8)
loss 370.5759582519531
Current State,action,reward,Response time,Next State:  (8, 19.340464848017284) 3 21.0 1501.39298325 (19.213467265587269, 9)
loss 368.4625549316406
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 364.26007080078125
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 368.46331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.9195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.6903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.51220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 369.9045715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.9002990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.1597595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.7663269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.1344299316406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.34490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.0322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.16107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 365.7252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.87200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.2872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.8263854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 367.74755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.3870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.3294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.4504699707031
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 368.3222961425781
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 362.8542175292969
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 4 19.0 1234.16752106 (16.229253414601111, 11)
loss 368.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.5921936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.4788513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 369.1228332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 368.2064208984375
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.21807861328125
############ Running episode number: 175  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 369.1452941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.3112487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.1375732421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 365.98529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 366.2118225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 370.3751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.3757629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 366.95025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 375.0243835449219
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 364.3716735839844
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 366.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 367.1746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.4608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 369.18902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.18304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.2159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.97381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.7652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 373.5897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 371.38726806640625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 369.1903381347656
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 0 22.0 930.768881517 (10.388469398680568, 8)
loss 366.3465576171875
Current State,action,reward,Response time,Next State:  (8, 10.388469398680568) 3 21.0 978.19544437 (10.344006106602812, 9)
loss 366.1524353027344
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 363.4369201660156
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 365.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.19744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.0420227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 367.06683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 366.3041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 369.839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.6796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 366.6008605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.6268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.3034362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.6024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.9686584472656
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 363.6991271972656
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 366.6170349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.1553649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.6971130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.5881042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.5330505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.1169128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 366.9997253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.00128173828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 368.5702819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.3744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 368.48724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.8777160644531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.8678283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.3551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.1365966796875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 364.8877258300781
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 368.5386657714844
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 366.603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.8663024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.1339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.1843566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.4306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.1372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 373.524169921875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.42242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.1209411621094
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 368.7317810058594
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 365.9291076660156
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 365.3490295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.8077697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.1278381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.15869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.4911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.5800476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.4676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.5694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.4822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 366.2407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.9473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.08209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.08111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.7328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.6536560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.9711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.3859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.8824462890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.81103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.6804504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.4463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.9841613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.53289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.67413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 368.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.53094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.55322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.5360412597656
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.9915466308594
############ Running episode number: 176  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.60113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.3345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.10986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 366.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 368.57501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.3604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.79022216796875
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 367.9739990234375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 361.6353454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.21240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 367.5655822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 367.1796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 367.1082458496094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 365.5516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.6432189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.9090270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.9838562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.7050476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.7046813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 369.1944885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 367.4021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.8319396972656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 365.258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.1444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.8124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.22900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 372.7232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.3221435546875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 367.17742919921875
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 364.2000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 368.1159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.3332824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 370.6719665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 366.15087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.84820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.3186340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.71368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.400146484375
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 361.3465881347656
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 367.3354187011719
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 365.5095520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.65875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 366.55712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.9231262207031
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 362.552001953125
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 366.9028625488281
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 365.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.5212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.7228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.43890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.5361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.29638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.3163757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 364.8621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.4208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.7524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.3084411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.0909729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.36126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.4354553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.2547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 368.7584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.1214294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.2281799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.78369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 366.0489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.5088195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.2643737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 368.68511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.8084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.56768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.9251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.2082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.4005432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.8140563964844
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 367.8060607910156
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 367.77130126953125
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 367.85858154296875
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 369.2445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.4130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.0107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.26458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 369.23095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.7126159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.38519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.0111389160156
############ Running episode number: 177  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.6878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.3345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.38653564453125
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 365.8880310058594
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 364.4759216308594
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 365.07421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 366.5357971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 368.6282653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 366.4992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.2939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.3616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 366.949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.5848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.32080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.4360656738281
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 364.09375
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 362.1922607421875
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 365.8011169433594
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 372.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 366.528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.7008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 368.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.19073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.6978454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.85601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.5316467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.21624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.81719970703125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 362.5080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.3121337890625
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 363.8253173828125
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 367.38311767578125
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 364.4402160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 368.87738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.5497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.32330322265625
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 363.87786865234375
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 366.3384704589844
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 367.10467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.40997314453125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 366.92242431640625
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 364.159423828125
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 361.6496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.2910461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.9208984375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 363.2635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.8816223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.3955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 366.1234436035156
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 363.5082702636719
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 367.7185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.5079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.19500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 364.8034973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 366.5249328613281
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 369.28350830078125
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 364.6450500488281
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 371.3662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.5118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.08831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 365.4933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.9194641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.1211242675781
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 368.5478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.8164978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 368.4101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.8998107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.52716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.3316345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.8805847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.2705383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.86090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 367.5149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.8172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.5924377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 368.3768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.4417419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.4115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.76434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 366.4015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.7855529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.1555480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.0144348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.4078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.3759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.6807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.209716796875
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 361.061279296875
############ Running episode number: 178  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.40313720703125
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 365.623291015625
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 366.31170654296875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 0 22.0 993.95437024 (11.469111876584304, 8)
loss 361.8952941894531
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 365.268310546875
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 367.010009765625
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 365.05816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.7819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.96380615234375
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 367.1740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.23577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.3115539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 366.2968444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.8647766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.0081787109375
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 364.1551208496094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 4 19.0 934.046546974 (10.448897752470936, 11)
loss 368.8467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.7564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.4146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 364.6552429199219
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 362.158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.49468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.5963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.7236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.58367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.8587951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.6616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.2237854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.0954895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.9282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 368.3256530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 366.9442443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.3934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.5555725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 368.0791320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.59210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.9843444824219
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 363.66192626953125
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 366.69696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 369.2602844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.9085998535156
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 366.9474182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.72296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.3681335449219
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 367.6471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.30877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.5438537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.5496520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.56201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.0678405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.5033264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.53802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 366.7252502441406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 367.2025451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.58306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.2081604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 366.6023864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 370.0617980957031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 4 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.88140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.472900390625
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.2907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.67401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.3562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.8002624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.3189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.8596496582031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.80206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.4844665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.9544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.9319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.4103088378906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.3294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5171813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.57421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 369.2262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.4715881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.5865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 371.4077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.4926452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.7358703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.2170104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.744873046875
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 364.7515563964844
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 363.78118896484375
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 363.14910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.6899108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.2239990234375
############ Running episode number: 179  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.28326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.5064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.1259460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.7869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 368.44525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.6855773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.2310485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.4150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.4131164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.3675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.6243591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.0367431640625
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 362.546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.1474304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.7403259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.96221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.8351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.5035095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.4807434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.6120910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.7063293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.12921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.3101501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.12384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 366.7136535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.66748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.3647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.2476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.0251159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.48736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.4203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.4018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.7769470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.7685241699219
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 366.802734375
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 365.0723876953125
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 364.9212341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.1274108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 366.458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.4207458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 366.4902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.17437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.16082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.30206298828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.3757629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.7939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.9648742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.3033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.2214050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.3450622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.06201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.3528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.9931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.2514343261719
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 362.1557312011719
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 365.39532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.3246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 365.3907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.0381774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.14056396484375
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.0197448730469
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 363.0282287597656
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 362.5012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.23370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.6100769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.85308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.2581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.3672180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.6424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.5072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.94287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.7218933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.2691345214844
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 365.0406188964844
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 2 21.0 1260.87864843 (16.017694914042416, 9)
loss 364.5998840332031
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 364.0047912597656
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 365.8133850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.8682556152344
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 364.03692626953125
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 364.4184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.9948425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.0910949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.8722229003906
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.0887756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.6804504394531
############ Running episode number: 180  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.5029602050781
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.62799072265625
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 364.008056640625
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 363.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.63525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.62164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.8310852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.5564880371094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 362.145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.6198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.25677490234375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 363.77703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.1121520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.5899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.53857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.0910949707031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 364.6719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.13604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.9909362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.95684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 364.2823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.9208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.1999816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.6381530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.87158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.8771057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.4947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.5848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.9855651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.3315124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.3553161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.8457946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.1346130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.03607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.00787353515625
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 361.0220642089844
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 365.1761474609375
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 365.2147521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.6214294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.8603210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.22674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.3128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 368.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.1439514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.61358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.4448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.35723876953125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.8250427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.99652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.4198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.45172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.8340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.1849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.41265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.5667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.0843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.9388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.7630920410156
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.6305236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.5747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.9347229003906
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 368.2271423339844
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 367.4021911621094
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 0 23.0 1385.00495784 (17.82724819986867, 7)
loss 364.11859130859375
Current State,action,reward,Response time,Next State:  (7, 17.82724819986867) 3 22.0 1486.76498054 (17.229782241685768, 8)
loss 362.84600830078125
Current State,action,reward,Response time,Next State:  (8, 17.229782241685768) 3 21.0 1378.03457101 (16.84211602880065, 9)
loss 363.3723449707031
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 363.6006164550781
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 366.24346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.3457336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.19610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 364.6673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.462646484375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 363.3731689453125
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 363.5854797363281
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 363.9214172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.8674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.7575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.4667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.9452209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.2458190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.24200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.1257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.3654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.3304138183594
############ Running episode number: 181  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.27447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.8311767578125
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 362.8394775390625
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 363.1103515625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 4 19.0 986.02903554 (11.336751742492702, 11)
loss 365.1253967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 365.3092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.5253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.2648620605469
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 363.1304626464844
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 364.63671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.81536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.1076354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 367.3015441894531
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 363.2088317871094
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 361.8001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.0467834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.3150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 366.19342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.1747131347656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 4 19.0 916.124940439 (10.42733414151318, 11)
loss 364.13836669921875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 363.246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 366.0496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.29168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.1932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 366.7637023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.3134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.93402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.70196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.0921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.76007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.3825378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.4787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.6463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.55419921875
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 365.4427795410156
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 368.1966247558594
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 365.1734313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.0025939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.4579162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.6283264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.3572082519531
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 363.0504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.2516784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.9608154296875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 0 21.0 1012.73322757 (12.501496275411796, 9)
loss 362.56494140625
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 2 21.0 1077.50917513 (13.168618569876575, 9)
loss 363.1926574707031
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 4 19.0 1112.429735 (13.649658108197247, 11)
loss 365.83489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 371.642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.5986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.2458190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.40869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.4186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 368.3339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.5596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.51019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.5246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.43609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.27545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.90899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 367.2756652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.1473083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.73114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.2637634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.0608215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.9356384277344
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.5788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.1700134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.27252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 366.3876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.2757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.34228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.4617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.6608581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.69183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.0622253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.6732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.1221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.05084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.7589111328125
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 0 21.0 1210.80462626 (16.11465619633363, 9)
loss 366.3820495605469
Current State,action,reward,Response time,Next State:  (9, 16.11465619633363) 3 20.0 1266.64026605 (16.147078378791146, 10)
loss 363.5301208496094
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 360.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 366.7012634277344
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 369.7100524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.68829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.2689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.72430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.189453125
############ Running episode number: 182  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.7688293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 366.6719665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.0936584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.1966857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 366.2000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 366.9435729980469
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 361.0165100097656
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 365.30657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.61859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.5095520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.04876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.4908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 368.04974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 367.0475158691406
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 363.0110168457031
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 4 19.0 972.171495057 (10.448897752470936, 11)
loss 364.795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.5564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.3971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.6631164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.4205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.8872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.6554870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.4686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.5998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.9785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.1173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.59112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.0928649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.2460021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.5713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.67730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.0592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.6025085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.98388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.2411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.49853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.28289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.0965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.69537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.1416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.1596984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.1795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 367.2110290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.2227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 365.26324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.9185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.8590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.78466796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.8379821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 368.5464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.5087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.7347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 367.46209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.5772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 366.3216857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.7212219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.99420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 368.06622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.9127502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.51068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.82232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.3882751464844
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 365.3910827636719
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 1 22.0 1429.39747342 (19.25591252280865, 8)
loss 365.63714599609375
Current State,action,reward,Response time,Next State:  (8, 19.25591252280865) 3 21.0 1496.45133993 (19.08360399753829, 9)
loss 362.55596923828125
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 370.919921875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 363.65960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.0834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.3551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 368.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.4058837890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.21734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.63787841796875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 363.4369201660156
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 367.4075927734375
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 362.6490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 368.45458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.77313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.8498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.6431579589844
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.15838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 369.2929382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.30450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.69183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.7166442871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8080139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.46710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.5464782714844
############ Running episode number: 183  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.8509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.6017150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 365.86578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.20556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 368.25
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0616760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.3639221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.9949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.8583068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.5350646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.3354187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.4032897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.0516052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 365.1351013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.4649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.2569885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 368.6996154785156
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 362.77838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.61993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.77130126953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 363.09173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.47955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.03485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.1991882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.89715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.82891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 367.6580505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.0334777832031
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 362.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 366.46905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.03729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.3455505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.8239440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.2980041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.31512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.3437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.6329040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.8290100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.9280090332031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 363.0309143066406
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 361.7696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.8574523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 367.885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 367.16534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.8197326660156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 4 19.0 984.787563682 (11.819721938468785, 11)
loss 365.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.8209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.65411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.3521423339844
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 365.09930419921875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 366.57379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.5065002441406
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 364.9953308105469
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 365.58758544921875
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 364.0732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 369.1659240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.8857727050781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.2923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.5040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.3736572265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.4195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 368.71722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.1958923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.75347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.3756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.0341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.7168273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.54595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.42034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.08209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.6767272949219
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 364.07470703125
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 362.6385192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.6408386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.2219543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.36956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.1195983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.5052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 365.56951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.91455078125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.6136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.37921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.9696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.1808776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.65301513671875
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 1 20.0 1210.80462626 (16.11465619633363, 10)
loss 363.89080810546875
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 369.79901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.3059387207031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.1597595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.5318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.1612243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.1791076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.0946044921875
############ Running episode number: 184  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 360.9241943359375
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 365.9946594238281
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 363.9857177734375
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 369.25982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 366.4769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.0825500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.09332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.5857849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 368.511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.57647705078125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 364.2641906738281
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 362.58856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 368.9985046386719
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 2 19.0 927.560809977 (10.552868829802469, 11)
loss 364.3244934082031
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 364.3157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.3128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.4015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.82977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.2441101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.7164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.7078552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.5945129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.7909240722656
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 363.9613952636719
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 365.8449401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 366.2143859863281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 363.0298156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.3712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.15325927734375
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 368.50762939453125
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 365.4766845703125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 367.1501159667969
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 361.8150329589844
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 365.0275573730469
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 365.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.09033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.4801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.3681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 368.0560607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.7566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.76080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.5718994140625
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 363.086181640625
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 363.7447509765625
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 365.3572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.01702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.6251525878906
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 363.7113037109375
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 363.9505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.9162902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.9515686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.039306640625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.34063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.99853515625
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.88726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.25079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.0085754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.4295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 368.37933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.2195739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.2627868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 367.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.80987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.5168762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.7532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.82696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 367.9713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.3954772949219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.0086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.09747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 367.4145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.6109313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.38983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.2497863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.2669372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.5289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.7406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.58306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.08355712890625
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 365.447265625
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 366.1279296875
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 368.2913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.7916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.7900695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.1260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.1800842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.561767578125
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 1 20.0 1269.21706044 (17.215992726625572, 10)
loss 363.6825256347656
############ Running episode number: 185  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 365.6808166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 367.400634765625
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 364.2122497558594
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 1 22.0 1023.46894667 (11.336751742492702, 8)
loss 363.80865478515625
Current State,action,reward,Response time,Next State:  (8, 11.336751742492702) 3 21.0 1033.61761156 (11.25610796929319, 9)
loss 366.2118225097656
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 364.41485595703125
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 361.22900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.13690185546875
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 366.32464599609375
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 365.8046875
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
loss 360.6512451171875
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 363.26959228515625
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 364.90850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.9095764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.65972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 366.436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 367.4137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.0286560058594
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 367.879638671875
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 364.2337341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.7225036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.76446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.6247863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.82257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.1567687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.4200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.5987243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 367.8118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.6286926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 367.58221435546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 367.3284606933594
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 364.0528259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.7214050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.5727233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.3740539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.0394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.9251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.10821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.14404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 367.5890197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.78179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.2576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.3747253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.56744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.3222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.4638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.5091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.21533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 367.1363830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.1426696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.4913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.70611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.3551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.28363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.3296813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.82696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 369.0069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.1176452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.37469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.77850341796875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.85894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.9027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.0922546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.0821228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.8250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.64453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.8088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.0318908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.55767822265625
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 367.4096374511719
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 364.1116027832031
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 366.5180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.7628479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.41693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.10162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 367.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.09417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.7070617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.66656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.0229797363281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.46429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.9852600097656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.1312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 368.8765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.1759948730469
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 360.39764404296875
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 365.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.06463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.1674499511719
############ Running episode number: 186  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.9572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.68328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.8770446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 368.28515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.25579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.2008361816406
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 364.84912109375
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 363.5511474609375
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 367.2081604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.57440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 366.146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.5099182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 367.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.3209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.1005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.1279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.7547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.6415100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.1098937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.43145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.2316589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.6627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.8046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.6904602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.3748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.80487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.1273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.6514587402344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 364.5530090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.35675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.6019592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.2049865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.310546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 361.2536315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 367.7353820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.2547302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.9067687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 366.29437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.573486328125
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 365.33355712890625
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 364.27691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.5422668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 366.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.7766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.18414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.2559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 367.03814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.1896057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.19268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.77294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.33056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.5461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.26226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 367.1325988769531
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 365.1097106933594
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 362.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.9837341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.12591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.7488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.41644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.4403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.2682189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.58984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.66168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 366.6629943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.75653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.74072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.8571472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.1565246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.63543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.5207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.77838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.3109130859375
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 2 19.0 1204.52470225 (15.550833128512703, 11)
loss 367.0133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.28118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.48541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.26788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 368.1317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.78814697265625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.3285217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.16583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.69500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.6526184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.2607116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.2206115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.7882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.2198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.9340515136719
############ Running episode number: 187  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.5172119140625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.7270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.1842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 366.4637756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.7176818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 365.85821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.18731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.3729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.7127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.1192321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.6282043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.3896789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.4268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.1865539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.36163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 369.4645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 366.1228942871094
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 365.31854248046875
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 359.8335876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 365.1666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.1171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.9903259277344
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 364.4602355957031
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 364.54168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.33294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.24945068359375
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 366.95501708984375
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 364.868408203125
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 366.0818786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.1996154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.7720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.6829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.6394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.1015319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.70562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.1586608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3908386230469
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 366.2723693847656
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 4 19.0 994.97675791 (11.039747673816453, 11)
loss 367.8209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.7865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.8690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.31878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.28045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.1294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.82086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.0626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.56494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.6228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.13787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.8190002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.53131103515625
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 364.6778869628906
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 363.3789367675781
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 366.4007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 366.2510070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.1197814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 369.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.2225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.7531433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.1467590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.96014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.3866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.94964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.2877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.3116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.2938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.0512390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 365.00274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.0205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.9091491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.6952819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 366.5340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.0929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.19146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.3838195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.34869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.07952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.95965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.0498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.6620788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.2867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.6813659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 366.1730651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.6598815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 367.2164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.5152587890625
############ Running episode number: 188  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.16534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.4114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.64642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.9469909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.1851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.8424987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.7015686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 366.5695495605469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 362.461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.3819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.7481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.59613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.89886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.9306640625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 361.71917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.42315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.97900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.07989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.6888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.5624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.2436218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.8702697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.7901916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.92120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.3398742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.9119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.61444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 367.0170593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.38958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.6433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.4919738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.6264343261719
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 362.44061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.7773742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.1330871582031
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 366.45989990234375
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 365.6247863769531
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 363.20086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.1763000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.64337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.5624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.19622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.01300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.4655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.3359680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.5079650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.25665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.6744079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.1576843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 365.3540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.3032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.30712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.7307434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.7305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.15948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.6384582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.26556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.5405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.8563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.8443298339844
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.8983154296875
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 0 21.0 1258.07554888 (16.237094554670044, 9)
loss 365.4248962402344
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 362.568115234375
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 361.1606750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.9034118652344
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 363.7630920410156
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 364.3212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.1064147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.15960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.2442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.29290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.4265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 367.3039245605469
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 363.9877014160156
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 362.2717590332031
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 361.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.5217590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.88104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.64886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.91943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.1375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.4397888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.60260009765625
############ Running episode number: 189  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.8514709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.0105285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 366.0015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 365.55999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 368.3951110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.3941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.83056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 366.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.63165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.5788269042969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 366.60284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.5372619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.6954650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.5252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.7515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.10955810546875
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 364.587646484375
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 365.65576171875
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 362.3553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 366.2192687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 367.09991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.3766784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.49273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 366.7314147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.3206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.7230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.6775207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.6735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.08355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 366.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 366.675537109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 368.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.32354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.2471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.85308837890625
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 368.02264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.6853942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.3592834472656
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 362.6072998046875
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 366.4910888671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 363.251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.13653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.9483947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.7256164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 366.4035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.5013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.9942932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.17926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.3179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.8203430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.73663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.91058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.92059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.6429138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.8503723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.1590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.80718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.4582214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.3154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.1080627441406
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 365.6974182128906
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 363.2047424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.4520568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.17034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.1388854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 368.46142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.2092590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.35858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.7191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.5149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.4740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.42169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.3708190917969
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 362.2552185058594
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 363.6116027832031
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 364.8296813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.0758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.0926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.1549377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.4941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.63189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.0779113769531
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.2713928222656
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.8834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.9241027832031
############ Running episode number: 190  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.7065124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.103515625
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 363.35845947265625
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 363.2469177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.71832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.55963134765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 364.33453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.06536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.3947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.5960998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 366.1982116699219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 362.5429382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.3180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 369.80743408203125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 0 21.0 922.369964659 (10.448897752470936, 9)
loss 363.14715576171875
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 0 23.0 970.065772031 (10.433149880183072, 7)
loss 367.5240783691406
Current State,action,reward,Response time,Next State:  (7, 10.433149880183072) 3 22.0 1025.98894457 (10.44185150623065, 8)
loss 364.1809387207031
Current State,action,reward,Response time,Next State:  (8, 10.44185150623065) 1 23.0 981.315350702 (10.370942817486826, 7)
loss 364.28045654296875
Current State,action,reward,Response time,Next State:  (7, 10.370942817486826) 3 22.0 1022.11240377 (10.42733414151318, 8)
loss 362.36187744140625
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 3 21.0 980.466886297 (10.388469398680568, 9)
loss 363.2462158203125
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 362.83587646484375
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 1 21.0 926.348821567 (10.319026962956018, 9)
loss 363.6951904296875
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 364.1385192871094
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 364.1535949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.25592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.6248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.3215637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.1080017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.5036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 369.0018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.0865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.53619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.62890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.29486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.1832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 366.4755554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.34112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 365.06524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.2455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.2212829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.4208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.03472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 368.1195983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.33331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.6507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.2838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.03497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.71710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.56597900390625
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 365.5587158203125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 366.04241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.19097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.6504821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.50714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.4499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.3200378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 367.9185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.1231994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.86383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.2273254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.3078918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.4046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.190673828125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 364.5777587890625
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 361.6863098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.3321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 365.6170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.82049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.5686950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 370.4377136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.91668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.5806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.78240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.9173278808594
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 362.7535400390625
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 365.2065124511719
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 366.6973571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.4598083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.3873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.52020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 367.2516174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.56427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.9501647949219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.7121887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.0608825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.0326232910156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.012939453125
############ Running episode number: 191  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.49462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.39520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.60406494140625
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 362.8995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.26544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.50323486328125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 363.0343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.51959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.4710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.4811096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.2757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.7425842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.69378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.9467468261719
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 365.5504150390625
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 367.5716247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.0957946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.30877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.51141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.25714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.8971252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.5535583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.13507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.7135925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.4626159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.21380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 366.1742858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.8462219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.56451416015625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 363.0029602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.3712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.46319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.4769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 367.80914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.6875305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.7862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.9010009765625
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 362.09356689453125
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 365.5680236816406
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 364.41204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.6087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.03582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.7964782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.7822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.3743591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 365.370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.53485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.02362060546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.0682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.94403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.5498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.2379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.51739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.93646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.0054016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.9942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.94854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.79425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.09149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.0635070800781
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 362.30206298828125
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 363.8469543457031
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 1 21.0 1405.95387237 (19.223969507401588, 9)
loss 366.24749755859375
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 363.84637451171875
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 363.09906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.8448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.0692443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.8401184082031
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 363.5314636230469
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 367.69183349609375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 364.19293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.17950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.3830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.2179870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.58978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.1905822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.3337097167969
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 361.012939453125
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 362.24798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.72857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.26385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.9508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.56353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.7428283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.8270568847656
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 365.10516357421875
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 366.6149597167969
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 362.24224853515625
############ Running episode number: 192  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.8871154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.40740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.6146545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.6123962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 365.87298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.46343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.5780944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.30499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.35400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.3365783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 366.6436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.58721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.5861511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.83935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.5200500488281
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 367.740478515625
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 365.541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 368.7419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.59552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 366.7240295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.93121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.9100036621094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 4 19.0 919.104778396 (10.388469398680568, 11)
loss 367.0905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.25811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 369.247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.47271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.1103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.1423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.22882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.6280517578125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 363.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.52520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.11163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 365.8800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.8001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.1970520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.59814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.3011169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 367.203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.46173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.1842346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.1871032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.4820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.3498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.2577209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.0678405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.1324768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.5033264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.92138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.0080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.5639343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.7772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.0409240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.0483093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.41632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.5931701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 367.4878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 367.4236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.1913146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.1340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.1257629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.8304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.536865234375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 366.8527526855469
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 363.9476318359375
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 363.2971496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.25885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 365.5335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 366.1241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.8735656738281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.37139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.5933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.1315612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.7789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.2888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.7164001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.1337585449219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.4061584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.7415466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.9288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.30072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.1853332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.6167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 370.2126159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.8544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.23162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.00494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.94207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.68719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.9430236816406
############ Running episode number: 193  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.72003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.7921447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.19207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.5435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.5467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.88018798828125
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 363.7974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.7281799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.67694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 366.2231140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.6294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.8337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.1640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.74951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 365.7814025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.6423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.22015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.71148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.0589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.6595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.6189880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.1415100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.45855712890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 364.46649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.16973876953125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 363.841796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 364.60809326171875
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 1 20.0 914.247384359 (10.305649118067803, 10)
loss 370.8764343261719
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 367.0267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.43646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.7031555175781
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 360.3561706542969
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 361.6934814453125
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 362.943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.3663635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.5325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.5172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.0238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.8640441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.9178466796875
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 362.0467224121094
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 365.138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.32891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.4352111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.5256042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.1605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.8644714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.9768981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.3087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.6453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.2050476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.0368957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.1206359863281
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 362.78173828125
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 365.0804748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.65838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 366.0364685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.7431335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0299377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.9306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.30633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.29608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.31243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.5447082519531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.22125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.6904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 367.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.33245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.4466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.6637268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.4626159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.3835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 365.25286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.1474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 366.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.1799621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.44073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.08697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.16082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.6317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.4023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.7269592285156
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.7901916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.37420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.3885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.5007019042969
############ Running episode number: 194  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 365.08831787109375
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 363.0371398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 365.8643493652344
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 361.7109375
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 365.0826416015625
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 362.077392578125
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 2 20.0 974.730436685 (11.027107764209074, 10)
loss 362.0822448730469
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 362.59832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.55450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.1859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.1591796875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 361.85308837890625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 2 20.0 948.856481751 (10.772009508959538, 10)
loss 367.9705810546875
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 363.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.17218017578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 362.3562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.6177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 366.72979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.93170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.91064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.5643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.11236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.0589904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.6766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.3898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 368.99859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.90673828125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 363.7781066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.4116516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.7964782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.6329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.1878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.37030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.71746826171875
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 364.00048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.60162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.44696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.56768798828125
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 363.3485412597656
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 364.9283447265625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 361.80047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.2228698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.1355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.3510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.6749572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.53106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.1713562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.6742858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.76141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.5326843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.4386291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.3000183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 368.37359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.47442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.4620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3089294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.494873046875
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 363.7913513183594
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 360.77972412109375
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 363.2089538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.3990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.4682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.6822204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.72003173828125
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 2 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 365.6387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.48272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.2424621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.77203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.96484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.7428283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.2976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.78173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.3734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.3851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.56982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.9126892089844
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 360.78173828125
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 364.2090759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.8192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.8187561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.4976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.0358581542969
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 363.3625183105469
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 365.64141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 366.8248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.77557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.8092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.4353942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.5066833496094
############ Running episode number: 195  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.3382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.3684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.4649658203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 364.4302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.1976623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.4500427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.3992614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.2950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.455322265625
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 363.71038818359375
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 364.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.380859375
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 364.4936218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.1275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.6380920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.2819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.10321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.8194274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.0679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.1391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.001708984375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 2 19.0 912.494916918 (10.278181486298042, 11)
loss 364.756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.8761291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.3747863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.7439270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.3985900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.79486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9477233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.8458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.8465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.5335693359375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 365.4746398925781
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 362.82086181640625
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 366.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.0314025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.0057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.6680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.6268005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.41015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.518798828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 361.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.3162536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.8706970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.7735290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.4720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.7237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.62347412109375
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.81298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.2901306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.63055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.86358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9140930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.4789733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.8101501464844
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.1455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.4379577636719
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 363.64312744140625
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 363.8162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.1815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.2529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 366.39654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.24859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.54620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.9676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.9521789550781
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 362.2349548339844
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 362.4164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.8045959472656
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.77239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.53765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.4090881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.9009094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.0525207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.4866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 365.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.8150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.6395568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.5227966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.8042297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.4855651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.81915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.8825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.11517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.5995178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.3535461425781
############ Running episode number: 196  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.6622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.0816955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.369140625
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 365.0202941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.1067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.7396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.3900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.28985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.4400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.3577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.390380859375
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 362.74761962890625
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 362.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.3049011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.0761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.93896484375
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 363.38720703125
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 364.44366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.25927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.9930114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 367.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.1333312988281
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 364.1239929199219
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 362.93121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.41583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.9063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.29620361328125
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 364.3766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.4476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.7679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.1217346191406
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 361.5352783203125
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 360.3953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.1456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.15777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.3340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.0453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.6443786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.3083801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.1384582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.8746032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.0730895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.67724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.8587951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.03192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.5063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.22259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.2817077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.4888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.5897521972656
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 363.07281494140625
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 361.9427185058594
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 361.8340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 366.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.783935546875
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 363.608642578125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 4 19.0 1400.68584406 (19.340464848017284, 11)
loss 363.3508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.34344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.5637512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.6693115234375
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 369.9410705566406
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 362.4179992675781
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 360.95245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.63720703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.95147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.0790100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.10626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.7489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.8050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.49237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.01318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.4886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.5360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.5342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 367.88140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.40087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.3907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.1099548339844
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.6468811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.9261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.9314880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.8526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.8760681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.2264709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.1472473144531
############ Running episode number: 197  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.42041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.4264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.98992919921875
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 362.5786437988281
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 364.3905334472656
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 363.502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 366.0348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.1959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.61663818359375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 362.577392578125
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 364.0746154785156
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 2 20.0 949.051873418 (10.644925616761762, 10)
loss 361.7897033691406
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 361.3124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.8670349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.60009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 367.2218322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.21282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.45587158203125
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 363.23675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.8688049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.0658264160156
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 364.4234924316406
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 363.8201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.9770202636719
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 363.9465026855469
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 363.8299560546875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 368.6250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.3131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.6951904296875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 364.84515380859375
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 1 22.0 959.563389179 (10.276491935146446, 8)
loss 364.3179016113281
Current State,action,reward,Response time,Next State:  (8, 10.276491935146446) 3 21.0 971.650944469 (10.236991269871366, 9)
loss 364.1805419921875
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 365.248779296875
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 365.94110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.9834289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.8460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.7751770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.2313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.9925537109375
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 362.8030090332031
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 360.86572265625
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 362.44696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.4143981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.7518615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 368.968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.29046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.7747497558594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.7566833496094
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 365.2734069824219
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 361.632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.3502502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.4569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.2022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 369.226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.6988830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.8538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.50341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.38043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.0471496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.4903564453125
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.1685485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.1736755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.13800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.2972106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.8293151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.59283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 367.14727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.7225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.3849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.90899658203125
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 363.6361389160156
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 1 21.0 1217.27964986 (15.550833128512703, 9)
loss 362.2819519042969
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 360.9190368652344
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 363.4923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.4175109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.44287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.38507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.64569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.3893127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.51165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.3018493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.5784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.5308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.6977233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.2572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.3994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.18951416015625
############ Running episode number: 198  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.89483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.7523498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.4920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.50360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.52056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.75933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.8809509277344
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 362.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.06243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.1090393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.7381896972656
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 363.74322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.47344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.0032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.3651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.7657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.8974914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.3460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.89874267578125
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 366.939208984375
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 363.37261962890625
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 2 20.0 928.707336523 (10.344006106602812, 10)
loss 364.2578430175781
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 364.3584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.72161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.1212463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.5295715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.6273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.9292907714844
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 362.88531494140625
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 364.7984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.0464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.5254821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.63775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.5765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.7590637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.74639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 367.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.0094299316406
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 364.2003173828125
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 1 22.0 986.945968488 (10.924797168745895, 8)
loss 363.3243103027344
Current State,action,reward,Response time,Next State:  (8, 10.924797168745895) 3 21.0 1009.54101094 (11.039747673816453, 9)
loss 361.7052307128906
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 367.9400634765625
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 364.30462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.12103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.63818359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.76708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.2236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.78021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.3079528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.6739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 364.6421813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.9189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.5906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.7365417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.7174987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.29425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.0071716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.57177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.75537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.7500305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.6923522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.3168029785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.1410827636719
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 363.81915283203125
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 360.5927429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.4510192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.86407470703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 4 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.49774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.8663024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 368.52703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.4280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.7165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.2527160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.6001892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.14239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.9008483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.6348571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.44384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.18206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.5758972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.8531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.1656188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.04058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 365.7848205566406
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.0313720703125
############ Running episode number: 199  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.4352111816406
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 362.60614013671875
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 364.943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.2488708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.5578918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.8085021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 367.7497863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.4429016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.36383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.5837097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.26318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.36346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.0686340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.1519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6309509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.28564453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 362.7018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.2044372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0773620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 364.6855163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 367.0101623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.2044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.07733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.191650390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 364.25726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.86297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.3055114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.9159851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.3743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.81341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.8566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.0080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.89495849609375
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 363.6117858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.2894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.2334289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.6387634277344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 361.7469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 366.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.81805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.4244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.3638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.1335144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 365.7580261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.9967346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.1330871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.2409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.2485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.13958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.2770080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.77471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.0981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.11627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.07421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.7277526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.45721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.65185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.32073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.6255187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.6480407714844
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 366.3891906738281
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 365.58428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.8449401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.8318786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 368.5052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.4990539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.0628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.6926574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.8023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.6358337402344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 366.9625244140625
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.17755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 367.10308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.86248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.4043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.2507019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.3398742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.14117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.5514831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.6197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.0160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.4094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3392639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.0736999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.6760559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.70855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.2134094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 368.34173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.0667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.8498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.6325988769531
############ Running episode number: 200  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.0997619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.9893493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.95526123046875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 369.279541015625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 2 20.0 986.02903554 (11.336751742492702, 10)
loss 364.9309997558594
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 364.7151184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.1375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.2953796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 368.35272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.71240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.38006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.4327392578125
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 2 19.0 930.602776506 (10.58735855349979, 11)
loss 361.5993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.32916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.69036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.4992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 366.82989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.71636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.6488342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.55718994140625
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 361.82305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.03558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.36761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.73162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.6778259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.1690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.2291564941406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 363.48504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.06622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.6399230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.35418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.5118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.3614807128906
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 364.9948425292969
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 363.9067687988281
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 364.93182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.7271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.1476135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.50347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.1150207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.3996887207031
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 364.5625915527344
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 361.7779235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.33575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.9234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.3475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.7757873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.7024230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 364.0459289550781
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 365.31781005859375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 362.1390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.8185119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.5973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.3126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.3643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.4633483886719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.2881774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.80999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.1315612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.28680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.4818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.7920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.5295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.32440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.5602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.7861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.2872619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.3802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.4461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 365.44366455078125
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 362.47515869140625
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 362.80706787109375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 362.47662353515625
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 362.05181884765625
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 361.7047424316406
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 362.48748779296875
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 363.1430358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.5242614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.1064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.64202880859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 4 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.085693359375
############ Running episode number: 201  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.80615234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.6881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.42230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.1096496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.72503662109375
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 362.25128173828125
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 362.5572204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.6684265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.14642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.3356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.87664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.3955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.09088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.0652770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.0460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.95672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.09967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.70281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.65435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 364.1671447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.84063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.1610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.5721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.5602722167969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 361.19464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.1856384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.2810363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.1438293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.6387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.69451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.74078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.3123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.6336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.37579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.88726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.5735778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.7250671386719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 364.2259826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.4562072753906
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 364.434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.8272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 365.68304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.9456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.3013916015625
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 362.95257568359375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 360.427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.3428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.9338684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.1172790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.8035583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.5870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.0427551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.6277160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.2393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.7174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.7636413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.2518005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.3739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.0623474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.4872741699219
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 363.2028503417969
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 0 23.0 1429.39747342 (19.25591252280865, 7)
loss 364.62109375
Current State,action,reward,Response time,Next State:  (7, 19.25591252280865) 3 22.0 1575.79467084 (19.08360399753829, 8)
loss 363.30389404296875
Current State,action,reward,Response time,Next State:  (8, 19.08360399753829) 3 21.0 1486.3808035 (18.668181536495972, 9)
loss 360.2946472167969
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 363.0048828125
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 368.49932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.8517761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.5709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.5992736816406
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 363.7442321777344
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 363.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.25738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.4151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.3096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.44921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.2060852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.0138244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.1449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.3872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.0364685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.1048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.7844543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.9393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.10943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.16253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.48590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.9977722167969
############ Running episode number: 202  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.1729431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 365.2608947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.82879638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 363.3862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.49822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.1324768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.2575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.66217041015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 361.3398742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.4730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 367.55328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.14117431640625
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 364.1031494140625
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 364.3692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.2964782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.41619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.67852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.45220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.66162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.6619567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.21160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.61907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.19268798828125
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 363.85186767578125
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 362.7794494628906
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 361.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.1495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.45751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.2684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.5428161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.50543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.5769348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.5169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.5527648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.3507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.3284606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.1247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.9356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.6595153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.07073974609375
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 363.9218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.6697692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.76348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.9691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.9923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.02490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.5018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.9499206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.8498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.1399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.58935546875
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 363.9437561035156
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 362.68304443359375
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 364.0374450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.5599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.5615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.4737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.44122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.8290710449219
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.2207946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.15277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.4151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.06866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.6890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.38629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.13134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.5635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.9110412597656
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 363.80096435546875
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 363.6297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.1601257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.5491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.70361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7633361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 366.1971740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.1907653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.8900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.29962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.9165954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.3990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.41497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.64202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.59954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 367.1778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.6686706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.1100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.49835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.3243408203125
############ Running episode number: 203  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.52850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.38812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.8858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.89068603515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 361.27362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.39910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.1796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.55902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.1648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.4404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.6767883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.3404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.3359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.3518371582031
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 364.8346252441406
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 362.2181396484375
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 363.0378723144531
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 360.0914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.582763671875
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 362.9508056640625
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 363.5635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.3504638671875
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 360.7492980957031
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 364.3118591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.6468811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.6914367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.8304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.7312927246094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 362.068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.6235046386719
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 360.829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.78387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.5576477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.4715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.4234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.09674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.1973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.3994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.9837341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.6432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.4828796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.38702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.1831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.2548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.82403564453125
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 364.8891906738281
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 363.44085693359375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 366.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.2904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.0672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.0978088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.6553649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.2649841308594
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.26458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.25445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.2588195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.58319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.1592712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.71734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.17205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.1507873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.8094177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.3493347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.3338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.6939392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.98175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.7027282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.2532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.0968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 366.6260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.7011413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.74505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.5615539550781
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 1 20.0 1200.39231205 (15.817158911312735, 10)
loss 366.10284423828125
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 362.68304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.9100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.1861267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.06475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.49700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.23211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.30413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.4057312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.44488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.42584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.61676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.18896484375
############ Running episode number: 204  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.1665344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.8471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.5487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.2252197265625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 360.2983703613281
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 0 22.0 979.00811241 (11.25610796929319, 8)
loss 360.6157531738281
Current State,action,reward,Response time,Next State:  (8, 11.25610796929319) 3 21.0 1028.90440276 (11.027107764209074, 9)
loss 362.25927734375
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 367.3928527832031
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 0 22.0 960.915933313 (10.931193889570471, 8)
loss 365.5113830566406
Current State,action,reward,Response time,Next State:  (8, 10.931193889570471) 3 21.0 1009.91486598 (10.816918347608043, 9)
loss 364.9915466308594
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 362.85760498046875
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 362.80328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.53631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.5843811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.5582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.5019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.27813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.4540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.3357849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.2477111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.4524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.3314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.7226867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.83758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.2735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.7635192871094
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 362.93280029296875
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 364.5041198730469
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 362.8502502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.5511779785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 364.30096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.6512756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.69366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.7508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.7098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.4024963378906
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 363.70367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.5752868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.41839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.99981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.8169860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.82733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.7876892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 367.03143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.1788024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.7087707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.2024841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 367.1774597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.3565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.1852722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.5472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.50909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.19061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.3219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.5950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.6204528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 365.1717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.62939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.6609191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.27099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.43048095703125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.0241394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.5975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.9443664550781
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 361.4907531738281
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 364.3564453125
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 365.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.0883483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.3079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 367.0335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.56787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 364.2776794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.4324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.03033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 365.078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.7246398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.7764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.2309265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.8308410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.3295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 367.88787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.6773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.0753173828125
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 364.8708801269531
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 364.65228271484375
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 366.0323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 366.0657043457031
############ Running episode number: 205  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.1675109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.68048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.2201843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.0872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.6189880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.01513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.78472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.5069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.5661926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.90399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.76043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.7005920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.02630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.0990905761719
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 363.5317687988281
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 363.6856384277344
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 364.2814636230469
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 362.2219543457031
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 361.9601745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.2626647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 364.84600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.173095703125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 363.7411804199219
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 362.879638671875
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 364.61669921875
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 362.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.40740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.5391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.26458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.38330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.63360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.74658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.54486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.5805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 366.1082763671875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 363.0782470703125
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 363.3966064453125
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 363.809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 366.7931823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.4427490234375
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 362.59918212890625
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 362.0291442871094
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 364.2075500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.2627868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 366.6526184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.57586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.2572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.71490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.2196350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.4505310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.2041931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.2608947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.9256286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.7007141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.37054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 366.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.5178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 366.2508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 365.0506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.7522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.6516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.1417541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.6491394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.20928955078125
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 363.6481018066406
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 361.3739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.6731262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 367.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.07794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 367.49713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.3172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.34698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.5853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 367.4308166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 368.50933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.61004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.84307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 366.61956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.46112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.39129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.3714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.72161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.8302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.8507995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.9134216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.77545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 367.5317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.18896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.4969787597656
############ Running episode number: 206  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 365.709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.4541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.53106689453125
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 362.522705078125
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 4 19.0 1012.3192433 (11.027107764209074, 11)
loss 363.70849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.7845764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.6300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.6460876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.3054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.5970764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.42095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 366.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.6882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.8567199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.70953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.8682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.6270446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.0687561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.8125
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 362.438720703125
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 360.51397705078125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 364.49749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.69744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.6058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.5697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.00384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.5508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.8445739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.03631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.5263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 366.3047790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.6816711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.8858337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.4141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.4030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.9207458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.67047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.6372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 366.09051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 367.3013610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.31744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.2034912109375
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 363.5032958984375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 366.6751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.46087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.0016784667969
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 362.150390625
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 365.62908935546875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 363.80450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 365.2116394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.06829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 366.7571105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.7709655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.9377136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.46966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.1829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.12152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.32257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.05340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.7279968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 365.06005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.90350341796875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 362.46759033203125
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 363.88201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.3935241699219
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 365.3889465332031
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 365.73974609375
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 362.2435302734375
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 362.8418884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.7744445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 365.0165100097656
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 0 21.0 1226.10492247 (15.950694610794756, 9)
loss 364.2359619140625
Current State,action,reward,Response time,Next State:  (9, 15.950694610794756) 3 20.0 1258.0576862 (15.828704162850809, 10)
loss 361.38018798828125
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 363.47271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.7532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.20880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.20648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.33453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.4249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.0808410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.91265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.62396240234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.34124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.8585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.2550354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.13250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.0293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.4634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.9903259277344
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.7515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.71319580078125
############ Running episode number: 207  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.62188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.4296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.21807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.1768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.0936584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.8857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 366.23638916015625
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 365.76715087890625
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 363.32025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.8721618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.7752990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.6401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.4164123535156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 366.4243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.3785095214844
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 363.2680969238281
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 365.2259826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 366.6514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 366.50518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.60174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.4320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.09521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.6766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.5416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.71221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.3135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.2170104980469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 363.4451599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.6317443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.0862121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.62030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.58966064453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 4 19.0 914.152581784 (10.390165524255663, 11)
loss 364.23284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.3244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.66973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 366.7134094238281
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 0 21.0 929.522052234 (10.771376986314287, 9)
loss 360.7353820800781
Current State,action,reward,Response time,Next State:  (9, 10.771376986314287) 3 20.0 986.945968488 (10.924797168745895, 10)
loss 362.1108093261719
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 360.11419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.38409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.2416687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.24298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.4566345214844
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 0 21.0 1028.70793389 (13.168618569876575, 9)
loss 363.9256591796875
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 364.8686828613281
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 364.361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.9896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.0971374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.5937194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.6402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.2586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.4790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.2771911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 368.0928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.4403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.7574768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.7730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.37127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.4772033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.17254638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.20703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 365.4326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.5498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.5595397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.57830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 367.7893981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.56207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.2928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.42254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.6484680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.8592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.2304992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.5188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.7817687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.8757629394531
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 0 21.0 1211.18755114 (16.004586266677634, 9)
loss 363.8520812988281
Current State,action,reward,Response time,Next State:  (9, 16.004586266677634) 3 20.0 1260.87864843 (16.017694914042416, 10)
loss 364.04486083984375
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 361.60772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.0075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.19049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.62579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.4275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.1130676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.0721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.3953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.00335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.19873046875
############ Running episode number: 208  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.8014831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.42291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 366.1455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.4119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.1652526855469
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 363.7707214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.6883239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 367.0794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.8167419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.89361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.53179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 367.6702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.9080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.7008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.1402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.7733459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.4228820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.1354675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.3692932128906
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 362.97332763671875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 364.41253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.5694580078125
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 2 19.0 912.494916918 (10.278181486298042, 11)
loss 363.2486877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.52276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 365.1627502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.0705261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.02691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.6544189453125
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 361.0726013183594
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 364.2381286621094
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 363.4554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.1664123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.09783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.4339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 366.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.36993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.7867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.5636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.48968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.64788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.72235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.4561462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.6689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.74932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.83428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.37042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.6321716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.9162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.73583984375
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 362.26214599609375
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 361.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.4692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.23162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.3958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.0377502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.9017639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.8432312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.13006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.4513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.8447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.1797180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.0550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.3298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.60491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.4933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.2081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.35107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.5301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.7805480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.3174743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.3304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.33843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.59246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.6044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4874572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.19580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.5611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.16302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.65966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.71209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.1396179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.16986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.1970520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.0006408691406
############ Running episode number: 209  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.4649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 365.1629333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.1361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.34393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.4517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 365.3544616699219
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 360.7185363769531
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 362.7969665527344
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 364.63165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.6850891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.5791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.4361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.1602478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 366.5603332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.4368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.3272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.22210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.4286193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.1881408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.5704040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.2392578125
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 362.172119140625
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 361.4289245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.49932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.22723388671875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 363.0374450683594
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 362.4289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.02239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.9832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.67694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.8828125
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 363.2064514160156
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 0 22.0 925.797758139 (10.390165524255663, 8)
loss 364.9534912109375
Current State,action,reward,Response time,Next State:  (8, 10.390165524255663) 3 21.0 978.294574081 (10.425974763084863, 9)
loss 363.0901184082031
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 364.6117858886719
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 363.8044128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.8409729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.2880554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.8435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.26568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.40533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.4326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.68988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.66192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.62744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.19024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 365.20477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.97784423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.1922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.6105041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.1861267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.1611328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.7226867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.1112976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.2948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 367.4554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.68756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.4284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.11529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.0400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.7261047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.07366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.13250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.5361328125
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.80352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.2961730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.2684020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.4430847167969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.4778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 365.0033264160156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.8612365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.71942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.9786376953125
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 365.1308288574219
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 362.02783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.1507873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.1473083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.67572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.9669494628906
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 363.395751953125
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 362.2095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.0568542480469
############ Running episode number: 210  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.46258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.0895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.0750732421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 364.25433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.64080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.4088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 367.5919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.38702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.3010559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.8609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.33526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.7357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.82415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.5047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.8327941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.18115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.2589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.9783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 365.9460144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.6549377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.6308288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.5016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.4925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.8003234863281
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 363.71185302734375
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 4 19.0 960.611029141 (10.335411397720526, 11)
loss 362.9267272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.2303771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.4283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.0323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.952392578125
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 361.9220886230469
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 363.09295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.58880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.4316711425781
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 366.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.04052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.0024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 367.1518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.5669250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.44482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.27008056640625
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 364.2128601074219
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 362.50726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.1167907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.12554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.45050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.8633117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.3064880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.3385314941406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.6763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.88092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.2137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.64501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.3837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.7193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.9892272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.7545471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.43914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.1025695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.8023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.6181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.2631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.8409729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.1357116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.9725646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.7380065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.16522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.1911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.0575866699219
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 365.4073486328125
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 364.9252624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.55474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.3621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.2497253417969
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 362.45416259765625
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 362.5715637207031
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 365.1053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.5500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.12603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.56884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.94439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.6464538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.4277648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.0534362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.0960998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.1009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.19207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.4630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.6228332519531
############ Running episode number: 211  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.2259826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.63726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.2261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.4649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 365.553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.38812255859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 4 19.0 950.798097136 (10.995673623987257, 11)
loss 362.4927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.7408752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.2032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.88568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.24237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.1407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.1560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.0083312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.8253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.32501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.4287414550781
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 362.6446228027344
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 363.2575988769531
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 370.6465148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.8349914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.3302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.6202697753906
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 366.2521667480469
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 364.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.5576477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.8160095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.58880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.46856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.9543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.5032958984375
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 1 20.0 916.069372847 (10.316955310454549, 10)
loss 364.5738220214844
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 365.288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.9822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.82452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.16278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 366.1615905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 366.16229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.22998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.7845764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.34136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.18560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.89886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.1517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.0307922363281
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.8363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.9127502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 366.30523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 367.41424560546875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.1278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.0867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.3411560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.8343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.7335510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.5880432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.65643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.7698669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.25897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 366.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.5775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.4512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.3853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.3721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.9241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.6127014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.0137023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 370.1332702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.5910339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.6415710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.7825012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.45135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.29046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.211181640625
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 361.0703125
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 362.3288879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.75201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.4954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.1198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.21221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.22357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.0301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.51971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.5494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.14794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.1879577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.3799133300781
############ Running episode number: 212  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 362.20526123046875
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 364.58197021484375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 363.5199890136719
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 364.2892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.56036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.5057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.7628479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.26629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.31365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.8093566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.70867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.8258361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.07562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.2268371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.79193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.6690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.48077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.0815734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.5365905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.4169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 364.44732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.23529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.4892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.7830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.7915954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 368.5816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.33929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.4272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.18316650390625
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 361.706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.286376953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 362.5230407714844
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 361.4000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.7574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.2568054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.47808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.8369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.5007019042969
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 365.67303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.5759582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.2073059082031
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 364.547607421875
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 362.87518310546875
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 363.4815979003906
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 362.8232421875
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 361.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.7582092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 366.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.42578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.6600646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.5691833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.6902160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.6446838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.4518737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.83905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.8219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.0129699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.7021789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.5265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.44891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.1814270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.6230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.5328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.4209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.8685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.9346618652344
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.6956481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.1677551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.76275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.1313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.7703552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.8407897949219
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 362.7908630371094
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 362.9246520996094
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 362.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.5080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 367.3676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.3944396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.6903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.68548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.0101623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.4755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.87957763671875
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 364.17523193359375
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 364.3791198730469
############ Running episode number: 213  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.32098388671875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 363.2720642089844
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 362.2961120605469
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 362.5181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.80657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.59014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.37286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.6317443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.0929260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.14520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.70074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.74371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.4590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.25897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 369.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.0005187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.7840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.0741882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.6156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.6274108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.2062683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.1759948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.1375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.7943420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.47894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.5775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.83154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.5390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.1380310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.7722473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.21649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.40692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.1529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.7872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.8078918457031
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 364.48004150390625
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 363.16290283203125
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 363.12127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.4017028808594
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 363.9205017089844
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 362.6783752441406
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 362.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.7423400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.37384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 365.8087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.7951354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.27862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.97064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.9349670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.4884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.5705261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.54229736328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.0581970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.1576843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.05230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.1519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.3515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 368.64593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.97265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.5783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 366.1103515625
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 363.9140625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 364.51025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.6184387207031
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 361.92626953125
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 0 22.0 1367.89714889 (18.375894992990247, 8)
loss 363.9143371582031
Current State,action,reward,Response time,Next State:  (8, 18.375894992990247) 3 21.0 1445.01889581 (17.82724819986867, 9)
loss 365.3094482421875
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 362.6175537109375
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 2 20.0 1291.59856437 (16.84211602880065, 10)
loss 365.53460693359375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 363.9951477050781
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 362.9080810546875
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 362.7977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.5209045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.2852478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.7970275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.4103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.8245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.4439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 367.3804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.36846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.5613098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 366.38970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.0420227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.7171325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.6432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.7412414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.24639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.7334289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.57733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.8426208496094
############ Running episode number: 214  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.4323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.0951843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.6111145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.1695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 367.3236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.55291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.5465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.2814636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.96435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.7186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.6379089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.9046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.6806945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.2059631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 365.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.3559875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.0403747558594
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 363.7726135253906
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 1 22.0 965.985215893 (10.42733414151318, 8)
loss 361.7062683105469
Current State,action,reward,Response time,Next State:  (8, 10.42733414151318) 3 21.0 980.466886297 (10.388469398680568, 9)
loss 363.3380126953125
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 366.1376037597656
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 362.0418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.5806579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.7183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.31304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.7668151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.04339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.32080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.20428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 367.6779479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.73681640625
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 362.7464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.45611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.72900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.7259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.2202453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.9366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.9756164550781
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 363.7226867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.5340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.3668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.4455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.7253112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.0213928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.30755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.4215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.9815673828125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 362.95538330078125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 4 19.0 1192.09754638 (15.836943704090487, 11)
loss 363.5024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.7954406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.4176330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.9639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.42578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.4317932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.3388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.79718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.4221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.4990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.9036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.7796325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 365.1885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 367.4695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.1414489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.7101745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.25250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.01251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.7221984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.4029235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.85015869140625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 0 21.0 1189.84140354 (15.446694946204717, 9)
loss 363.63970947265625
Current State,action,reward,Response time,Next State:  (9, 15.446694946204717) 3 20.0 1231.67579099 (15.750501603468638, 10)
loss 363.2165222167969
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 361.7333068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.49407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.89031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.84271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 365.0265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.34783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.3153991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.69927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.7763977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 364.8467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.03857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.0635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 365.17108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.36175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.0547180175781
############ Running episode number: 215  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.98651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.4378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.1690979003906
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 361.821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.6779479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 364.5623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.4383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.32568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.34716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.4635925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.4308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.2020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.2279968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.8556213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.50115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.5061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.7501220703125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 362.5134582519531
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 362.30645751953125
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 366.978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.8935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.3628845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.53521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.3218078613281
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 363.8743591308594
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 360.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 366.25054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.0474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 368.2273254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.9788513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.54949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.2557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.6676330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 364.36639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.6482849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.3818664550781
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 364.43524169921875
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 363.7108154296875
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 360.3762512207031
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 362.62762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.79315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 365.5733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.3167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.8781433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.4459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.24957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.22882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.26104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.32000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.5872497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.3085021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 366.1294250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.7270812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.7486267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.117919921875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 362.39019775390625
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 361.7646179199219
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 4 19.0 1396.82133527 (19.140765783401285, 11)
loss 360.82110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.8228454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.31494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.3549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.0528259277344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.0669250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.4219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.1989440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.9591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.4007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.52117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.1823425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.7962341308594
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
loss 360.3220520019531
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 364.0943603515625
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 363.5518493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.15789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.1501770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.7738342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.8240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 366.1202087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.71917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.3370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.2861633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.2889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.8282165527344
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 0 21.0 1258.27117243 (17.052961248403161, 9)
loss 361.9539794921875
Current State,action,reward,Response time,Next State:  (9, 17.052961248403161) 3 20.0 1315.75590499 (17.215992726625572, 10)
loss 360.817138671875
############ Running episode number: 216  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.7743835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.3533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.39385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.7422180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0894470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.9144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.3573913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.15753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.68505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.40216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.8823547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.3567810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.59759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.3001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.2278747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 365.78533935546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 364.9775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.2364196777344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 365.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.6864318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.0512390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.1720886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.4750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.2436828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.2680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.2439880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.1722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.8670349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.8945617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.6493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.6762390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.2076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.0984191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 366.5837707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.0010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.83538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.1373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.1592712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.1620788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.6944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.6869201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.899658203125
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 365.47686767578125
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 362.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.6063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.16912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.71685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.2315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.7384338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.6963806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.8070983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.35113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.8580627441406
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.1413879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.7991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.3980407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.98583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.5627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.0702209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.4278259277344
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.9371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.2455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.3648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.8760681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.20574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.7919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.7642517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.3431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.3009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.64422607421875
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 2 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.9944152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.48553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.2030944824219
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 361.9421691894531
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 361.8930358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.4593811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.13067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.2848205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.36419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.5780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.21405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.2885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.5811462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.77325439453125
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.64886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.0073547363281
############ Running episode number: 217  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 366.55108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.0709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.5987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.5865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.36151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.8246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.8194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.72198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.20623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.18115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.50616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.7458801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 365.4556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.6419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.19781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.37481689453125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 363.0267333984375
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 363.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.8801574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 368.86346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.4649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.25030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.15924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.42449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.2456970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.0945129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.1402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.66363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.2388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.27197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.51556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.78192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.3576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.1888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.3724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.0900573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.2349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.4099426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 364.23577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.4436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.32855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.9557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.5690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.2483825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.6432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.2462463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.2154846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.9871520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.2813720703125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 363.0003356933594
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 362.1112365722656
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 363.5585021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.8609924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.1028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.0022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.97515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.84765625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.1630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.08367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.2794494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.9662170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.6426696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.5710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.7276306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.3744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.67816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.0625915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.46209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.6451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.89947509765625
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 1 20.0 1214.51137704 (15.947547279389703, 10)
loss 361.0894775390625
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 2 20.0 1223.58357506 (16.11465619633363, 10)
loss 364.8688659667969
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 362.1183776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.39141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.5237731933594
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 364.1894836425781
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 362.20184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.83154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.2410888671875
############ Running episode number: 218  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.83331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.8236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 366.3108215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.3857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9943542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.5215148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.46771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.2416687011719
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 364.5384216308594
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 365.41851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.47064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.34246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.77081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.2464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.8993225097656
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 362.553955078125
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 362.4306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.42047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.8307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.3248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.34527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.70220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.1515197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.2057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.5467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.69354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.17840576171875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 361.7231140136719
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 360.82501220703125
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 364.75653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.5267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.9405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.4692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 366.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.0754089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.2107849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.9212951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.41961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.15716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.12347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.2771911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.6410217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.6203918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.0344543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.4195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.64306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.84759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.6122131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.4859619140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8460388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.828857421875
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 363.8637390136719
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 362.0496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.7276916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.9609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.2236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.35137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.26055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.2217102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.57635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 366.2840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.39129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.82562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.6195983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.4189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.31011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.3564758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.4112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.4867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 364.0504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.4887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 365.7015075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.0896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.1850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 366.38739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.3620300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.94500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.49755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.7677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.8442687988281
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.3548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.4288635253906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3601379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.36895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 365.4833679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.6900329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.7113037109375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.1022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 364.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.35894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.5010070800781
############ Running episode number: 219  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.29058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.3271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.32769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.5399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.6290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.5455627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 365.4098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.34027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.5060119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.3033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.62939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.3859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.80816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.61138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.6700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.153076171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 362.91204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.3442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.10577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.5268859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.4380187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.6084899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.9502868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.7554626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.7009582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.4237976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.4333190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.56536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.4053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.1683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.1373596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.8661193847656
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 361.794677734375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 366.77008056640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 360.277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.4574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.4651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.44073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.94073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.49066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.2999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.4015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.0534973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.7490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.06488037109375
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 362.5857238769531
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 360.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.4974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.4095764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.81298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.1018371582031
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 361.1015625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 364.2084045410156
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 2 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.4829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.3452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.05706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.31201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.00872802734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.71209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.3349914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.0025634765625
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 363.007080078125
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 361.40692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.8331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.4880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.2233581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.8847351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.4106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.2904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.3701171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.60809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.7269592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.0562438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.78399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.0354919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.73138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.8992919921875
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.39739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.47528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.6966857910156
############ Running episode number: 220  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.7995300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.6786804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.2943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.6702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.4117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 365.4800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.4046630859375
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 361.7352294921875
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 364.5062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.4549255371094
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 363.4313659667969
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 361.82574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.3143310546875
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 361.888427734375
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 2 20.0 934.046546974 (10.448897752470936, 10)
loss 360.4928283691406
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 363.2408142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.36492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.6165466308594
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 362.0937194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.4737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.94256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.6405334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.4115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.43743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.72698974609375
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 362.41680908203125
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 2 20.0 925.892923039 (10.305649118067803, 10)
loss 360.9779968261719
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 1 21.0 924.314209939 (10.24826025489064, 9)
loss 361.40875244140625
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 362.5562744140625
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 363.8970642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.0068664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.9818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.8663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.3796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.3182678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.40118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.2362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.6209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.65203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.6445617675781
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 363.91595458984375
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 361.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.2679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.5768127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.698486328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.93292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.4734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.97442626953125
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 359.8759460449219
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 362.5745544433594
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 363.09429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.0867004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.6009216308594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.6201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.1891784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.0594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.1058654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.24908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.6869812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 366.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.5738830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.6745300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.6445007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.5545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.1686706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.4642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.8964538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.0203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.1280212402344
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 361.872802734375
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 360.2176208496094
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 360.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.2952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.04083251953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 4 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.6943054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.1081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.7751159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.6011047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.4378967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.0413513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1246032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.70867919921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.6598205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.1868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.78375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.98382568359375
############ Running episode number: 221  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.45574951171875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.0691223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.1412048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.46319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.8476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.17877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.1573791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.2463073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.6035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.5180969238281
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 361.3702392578125
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 361.8446960449219
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 361.808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.58612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.37884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.5467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4966125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.3370056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.85150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.7849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.4323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.2442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.0068664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.5151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.4906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.85064697265625
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 363.8724670410156
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 362.72100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.48712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.68218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.5078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.2615051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.35797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.7763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.2496032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.45794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.58807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.0791931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.2892761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.05584716796875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 363.1081237792969
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 361.8116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.8879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.8457336425781
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 362.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.6888122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.2110900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.81292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.12908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.4671936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.0643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.18316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.3023376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.2649841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.0167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 366.25140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.34783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.4419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.0226745605469
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.4486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.12445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.66839599609375
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 365.0582580566406
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 361.11224365234375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 364.0800476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.4775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.2621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.7463684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.6031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.2496643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.51568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.8450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.39892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.83050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.3414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.4889221191406
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 361.6374816894531
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 362.61474609375
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 364.0281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.7691345214844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.9694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 365.53741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.3188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.6380920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.12579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.9893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.3098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.87432861328125
############ Running episode number: 222  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.4132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.37554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.90350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.01263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 364.419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.13665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.8130798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.1576843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 365.2021789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.4965515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.52349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.8847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.6588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.7132263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.3442077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.3688049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.68621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.5092468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.8725280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.95770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.3077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.9334411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.7652893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.4018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.3897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.3202209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.3414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.8490295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.35430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.7721862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.7254333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.31439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.2059631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.5423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.5752258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.91607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.7889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.8247375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.8394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 365.19329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.76123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.45166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.4256896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.89935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.17987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.3399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.1796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.0856628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.7898254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.39501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.74957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.4772644042969
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.7884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.8327941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.9013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.94305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 367.1413879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.34234619140625
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 363.22943115234375
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 365.51641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.42919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.2076721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.3333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.9455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.7070007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.4307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.5042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.64898681640625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.2071228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.4496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.0210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.3443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.8973693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.9371643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.64935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.5196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.8087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.1835021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.5564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.2162780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.2734680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.9666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.2225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.4065246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.1325988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.4671325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.1902160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.79193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.88897705078125
############ Running episode number: 223  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.79791259765625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 362.732177734375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 361.6517028808594
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 2 20.0 993.95437024 (11.469111876584304, 10)
loss 362.967529296875
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 363.9323425292969
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 1 20.0 967.160346038 (11.25610796929319, 10)
loss 362.7268981933594
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 360.3388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.1263122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.0010070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.89239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.60302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.6080627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.14935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.08087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.6782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.3752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.3215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.2087707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.42218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.5132141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.4895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.0215148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.11383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.47393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.27264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.3226623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.1175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.8106994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.70550537109375
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 2 19.0 909.046654676 (10.236272697871373, 11)
loss 365.2579650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 367.11370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.16485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.45440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2441101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.1616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.1374206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.4073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.7797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.35247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.6480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.5261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.7386779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.2871398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.50604248046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.7177429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.4883728027344
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 363.9258728027344
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 362.6409606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.4892272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.8487854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.18060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.4941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.42828369140625
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 359.5253601074219
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 361.0032653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.5223083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.28094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.7473449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.4652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.99755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.5036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.5181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.8257751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.3585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.40411376953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.7854309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.0025939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.1617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.1893005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.1148376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.21990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.41802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.8555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.1712951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.185546875
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.1595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.2627868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.1888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.6365661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.9828186035156
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 2 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.1496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.5560302734375
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.5597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.7677307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2980041503906
############ Running episode number: 224  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.1298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.25396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.3213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.7842712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.7568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.69635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.2566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.7623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.58819580078125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 360.95599365234375
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 363.05975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.46173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.1445617675781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 360.17901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.4293518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.05218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.49053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.95745849609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 362.10955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.9457702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.4478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.61920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.2347106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.6001281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0965270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.3642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.6418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.99298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.27044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.1651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.0387878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.5792541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.9402770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.7541809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.89239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.47344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.44476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.09039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.51702880859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 360.3291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.3857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.7913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.81427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.1036071777344
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.5006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.6491394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.26287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.35400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.0802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.62957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.1490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.3234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.6405334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6320495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.13690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.4626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.7793884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.9386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.6678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.35308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.4080505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.5799865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.89703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.63427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.6410217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.2481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.6553039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.6936950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9921569824219
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 359.79473876953125
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 2 21.0 1251.06775133 (15.829956988360925, 9)
loss 360.58770751953125
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 362.981201171875
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 361.8419494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.90863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.6278381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.4437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.2269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.3878479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.68572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3004150390625
############ Running episode number: 225  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.7955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.5896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.5607604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.3171081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.4775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.95654296875
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 361.81732177734375
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 362.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.48785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.6226501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.8415832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.3236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.1498718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.0470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.7016296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.772216796875
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 362.3374938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.1750183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.2803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.2758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.65985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.8680114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.7767028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.43060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.7505798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.3548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.6437683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.8576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.1674499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.19482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.1277160644531
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 0 21.0 916.069372847 (10.316955310454549, 9)
loss 361.6523132324219
Current State,action,reward,Response time,Next State:  (9, 10.316955310454549) 3 20.0 963.159236328 (10.333617326102203, 10)
loss 362.92694091796875
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 361.925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.01898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.54937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.2285461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.29315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.5549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.3548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.8072204589844
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 364.11773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.7645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.70086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.3173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.9352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.7734680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.9527587890625
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 1 20.0 1143.69153516 (15.353965082180355, 10)
loss 362.50860595703125
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 362.43121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.6815490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.5981750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.5047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.08819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.7760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.0792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.14599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.2247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.19354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.2364807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.97589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.61175537109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 4 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.5732421875
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 360.1836242675781
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 365.7261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.67083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.7121887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.57623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.0679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.6393737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.5450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.56671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.3221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.20538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.53375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.54376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.31439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.7427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2344665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.2566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.6296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.4149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2258605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.3909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.9992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.4478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.22662353515625
############ Running episode number: 226  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.6784973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.6432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.70233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.8511657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.23980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.4488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.23760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.34356689453125
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 361.2527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.31982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.5928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.31024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.6022033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.4508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.31182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.71954345703125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 359.3805847167969
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 360.90283203125
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 361.73291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.3169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.5892639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6662902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.34210205078125
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 361.0870666503906
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 360.9045715332031
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 361.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.27239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.1016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.6275939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.1148376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.4811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.47552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.8575134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.1142883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.8050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.53582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.5450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.6365051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.44403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.44720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.91204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.97589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.36328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.9862976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5376281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.8343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.8948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 365.1832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.85919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.0168762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.5912780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.5568542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.1836242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.4744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.31939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.0473327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.0872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.5191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.22674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.6484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.4084167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.58428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.3237609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.78851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.5899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.9695739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.59210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.3453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.72662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.19268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.6814880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.73046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.6532897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.3740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.2225036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.0426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.1883239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.5657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7447204589844
############ Running episode number: 227  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.98126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.4461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.3255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6328430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.3268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.27886962890625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 361.1312255859375
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 362.3002014160156
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 362.30450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.9443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.8287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.8920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.8767395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.9349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.69989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.4145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.0862731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.76904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.00079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.20062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.8128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.6134338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.67425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.8515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.9327392578125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 363.3108825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.35467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.6756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.8299865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.2220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.0617370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.9062194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.8593444824219
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 361.60174560546875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 4 19.0 937.064750655 (10.655373370049301, 11)
loss 361.851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.89404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.8304138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.4075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5644226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.19927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.94171142578125
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 2 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.6335144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.0711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.02813720703125
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.6114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.0721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.17724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.00537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.1720275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.2933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.9021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.0210876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.3490295410156
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 361.2398986816406
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 361.61004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.7481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.8211669921875
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 362.34716796875
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 361.48785400390625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 361.71453857421875
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 362.3537902832031
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 3 20.0 1422.05003169 (18.668181536495972, 10)
loss 360.5224609375
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 360.2882385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8103332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.7935485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.40765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.68365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.22015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.5083312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.0196838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.1783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.9379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.2369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.2751159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.4596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.2071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.55377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.2624816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.2257385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.7520446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.8739929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.9563293457031
############ Running episode number: 228  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.8778381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.7604064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.2484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.7997131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.45684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.60711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.68572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.8313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.5615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.98956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.2415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.0857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.7445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.25616455078125
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 360.44171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.5133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.89471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.44378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.54278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0921936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.0220031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.5163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.72216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.50762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.1188659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.32244873046875
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 363.70208740234375
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 362.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.1831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.43035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.7054138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.58135986328125
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 361.5523376464844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 361.61224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.75018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.97296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.4004211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7667541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.3108215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.5582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.43157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.16094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.7073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.66986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2633972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.9543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.7572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.50823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.8343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.37176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.2200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.02825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.5482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.4215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.0338439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.20111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.5673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.4899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.39691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.2308654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.0296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.6446228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.8216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.71612548828125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 362.0295715332031
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 361.483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.4943542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.9759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.6634826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.3130798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7105407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.4558410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.31427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.2261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9993591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.6761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.62939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.86199951171875
############ Running episode number: 229  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9808654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.84625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4524230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.7768249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.2749328613281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 361.08673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1995849609375
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 361.4330139160156
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 0 23.0 998.686790566 (10.931193889570471, 7)
loss 361.5337219238281
Current State,action,reward,Response time,Next State:  (7, 10.931193889570471) 1 24.0 1057.02541913 (10.816918347608043, 6)
loss 361.36798095703125
Current State,action,reward,Response time,Next State:  (6, 10.816918347608043) 3 23.0 1112.82017919 (10.819208572963639, 7)
loss 363.7425231933594
Current State,action,reward,Response time,Next State:  (7, 10.819208572963639) 3 22.0 1050.04686027 (10.768325938188134, 8)
loss 360.23907470703125
Current State,action,reward,Response time,Next State:  (8, 10.768325938188134) 3 21.0 1000.39608195 (10.772009508959538, 9)
loss 361.8619384765625
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 360.8237609863281
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 0 22.0 942.310823749 (10.58735855349979, 8)
loss 367.39697265625
Current State,action,reward,Response time,Next State:  (8, 10.58735855349979) 0 24.0 989.819480251 (10.552868829802469, 6)
loss 363.210693359375
Current State,action,reward,Response time,Next State:  (6, 10.552868829802469) 3 23.0 1095.35618014 (10.553846649940214, 7)
loss 361.00958251953125
Current State,action,reward,Response time,Next State:  (7, 10.553846649940214) 3 22.0 1033.5103727 (10.489125480251131, 8)
loss 361.4095764160156
Current State,action,reward,Response time,Next State:  (8, 10.489125480251131) 3 21.0 984.078268423 (10.448897752470936, 9)
loss 362.5628967285156
Current State,action,reward,Response time,Next State:  (9, 10.448897752470936) 3 20.0 970.065772031 (10.433149880183072, 10)
loss 363.6995544433594
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 363.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.5087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.3680114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.0475769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.55303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.7432556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.33782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.1531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.24261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 367.5484313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.19903564453125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 363.9130554199219
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 362.1785888671875
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 362.535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.41357421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 361.2727355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.0917053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.7482604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.8271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.82098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.28741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.8193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.25701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.8311462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.38153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.4197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 366.6518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.3975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.4313659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.76641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.2370910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.2175598144531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 366.0332946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.29168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.5440368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.6471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.16229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.1037292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.6712951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 366.6235046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.3790588378906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.3482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.3104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.7278137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.2359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.5271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.4061584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.1507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.5484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 367.4976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 364.4830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.4306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.3984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 364.5048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.1689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.3983459472656
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 368.71929931640625
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 360.48419189453125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 4 19.0 1226.60915635 (16.017694914042416, 11)
loss 363.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.2597961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.26971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.0321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.3519287109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.8735046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.8754577636719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.4539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.55517578125
############ Running episode number: 230  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.98126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.6718444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.9388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.7213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.6781311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 367.29327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.06207275390625
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 363.4180603027344
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 363.87164306640625
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 1 21.0 957.495664348 (10.816918347608043, 9)
loss 360.7521057128906
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 365.705078125
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 360.8128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.7104187011719
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 0 21.0 930.602776506 (10.58735855349979, 9)
loss 362.21533203125
Current State,action,reward,Response time,Next State:  (9, 10.58735855349979) 3 20.0 977.313511661 (10.552868829802469, 10)
loss 363.0382385253906
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 363.4640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.6815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.6670227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.6866760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.0359191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.85076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 367.42901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.79052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.83056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.49481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.1506042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.4291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 367.3399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.7184753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.70947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.07391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 366.779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.7203063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.3161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.6301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.2266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.2593078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.0824279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.99920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.2312316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.04278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.0410461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.52288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.1716613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.4482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.52325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.4007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.2765197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.83612060546875
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 363.21392822265625
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 365.4324645996094
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 362.3251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.7715759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.1953430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.82330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.24090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.3051452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.01153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.12652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.58148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 367.99755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 365.5157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.25457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.89141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 365.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.35858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.6820983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.1009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 365.1412658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.0094909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 365.85418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.7641906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.66632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 365.3626403808594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 4 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.1471252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.3551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.6114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.5733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.77301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.2987060546875
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 364.06109619140625
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 361.5865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.60137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.2339782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.1026306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.77264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.5643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.6256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.6230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.7133483886719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 4 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.9427795410156
############ Running episode number: 231  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.46893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.6647644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.2545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.5715026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.39801025390625
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 364.93695068359375
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 361.1111145019531
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 363.4518737792969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 366.4970397949219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 360.6108703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.5517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 365.4571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.0802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.9563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.3390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.3551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.6595153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.5135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 366.21929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.76446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.7409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.9509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.16131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.4683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.6585998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.5121154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 365.974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 365.3074645996094
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 361.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.3320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.6227722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.7414855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.3178405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.77398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.6688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.5805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.47491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.4527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 366.63525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 365.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.8584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.11737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.4427185058594
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 364.5030212402344
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 362.4271240234375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 363.13348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.3155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.6185607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.68365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.2241516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 367.7925109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.0651550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.7123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 365.1358947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.63372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.2689514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.2282409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.68475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.7391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 364.7148132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.3870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.18328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.3717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 364.1180725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.4288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.81280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.5242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.6866149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.72088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.5915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 367.1283264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.4439392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.3321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.2811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.6416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.3189392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.6173400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.0449523925781
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 362.9907531738281
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 360.36981201171875
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 362.3138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.3416442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.741943359375
############ Running episode number: 232  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.78228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 367.7969665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.2679138183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 364.2989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.7518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.52783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.3749084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.14508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.3656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.04296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.6201477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 364.2172546386719
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 362.57489013671875
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 368.12042236328125
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 366.14154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.3974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 366.5501403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.7011413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.76702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.8228454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.00054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 363.44189453125
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 369.3564147949219
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 364.7273254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.7524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.2347717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 364.72314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.8009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 368.0472106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.0470275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.90216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.9532470703125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 361.7461242675781
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 361.8514099121094
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 360.80010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.6454772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.2749328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.5412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.0643310546875
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 363.5
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.4589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.62677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.5458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.40032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.78863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.8038635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.41107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.9319152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.0495910644531
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 360.5331115722656
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 361.7789611816406
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 360.5772399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.28387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 367.0862731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.2150573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.78094482421875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.47760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.94085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.6073303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.3114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.23785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.7010803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.2645568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.3841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.086181640625
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 362.5886535644531
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 362.72967529296875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.4903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.4524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.0796203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.03790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8423156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.3154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.94097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.4249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.21563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.6429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 366.9379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.3463439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.1669616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.9919738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 367.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.3851623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.6880798339844
############ Running episode number: 233  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.5523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.6739196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.3234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.45806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.1851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.2342224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.1391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.5697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.2398986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.3690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 365.73077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.63323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.77288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.22222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.7395324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.9100036621094
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 359.9439392089844
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 366.0382080078125
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 365.9264831542969
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 1 20.0 916.124940439 (10.42733414151318, 10)
loss 364.6020812988281
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 361.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.68048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.58111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.2554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 369.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 364.4150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.3407897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.7845764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.81988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.24871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.2515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 365.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.5123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.2570495605469
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 364.30426025390625
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 362.0050048828125
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 363.99554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.37261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 367.6041564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.04022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.91571044921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 362.38262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.80999755859375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 362.90936279296875
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 365.6585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.4189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 365.7703552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.3312072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.4259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.16290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.18280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 366.1163635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.5696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.1908874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 364.68475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.2066955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.37823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.77728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.49407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.5372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.2796325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 367.6026916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.37451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.53302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.3408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.6481628417969
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 363.1883544921875
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 361.3768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.6666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.7549133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.3022766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.6927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 367.0282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.5881042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 364.1630554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.6597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.4215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.5505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.8663635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 364.00592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.3050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 366.70001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 366.3931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.8923645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 365.49554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 366.0434875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 367.0253601074219
############ Running episode number: 234  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.1138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 365.3872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.60919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.5251159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 365.3276672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 364.12646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.4839172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.1393127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.3357849121094
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 0 21.0 945.729803224 (10.816918347608043, 9)
loss 366.5065002441406
Current State,action,reward,Response time,Next State:  (9, 10.816918347608043) 3 20.0 989.329834005 (10.819208572963639, 10)
loss 360.5346374511719
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 3 19.0 951.555504911 (10.768325938188134, 11)
loss 365.214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.15380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.4209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.71038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.4818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.30572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 367.7930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.3576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.13916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.1750793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 365.1529235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.5686950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.4259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.2979431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 366.2691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 365.7586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.1458435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.4861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 365.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.7409973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.1439514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.79681396484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 4 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 367.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.5237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.6065979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.2250671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 365.2995300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.53167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.62481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.66192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.82733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.4985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 367.0212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1241149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 367.34136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.5792541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.5238952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.21112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.3436584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 366.38861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.6764831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.67236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.7481384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 365.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.9638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.47515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 366.7461242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.7884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.5672912597656
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 364.4783935546875
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 359.57366943359375
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 363.62799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.15863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.2740173339844
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.6028747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.3278503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.79583740234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 365.4657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.34759521484375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 2 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.15869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 368.7012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 364.8831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 365.622314453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.4798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.9485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 366.4955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.3572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.93829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3661193847656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.4999084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.75433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.4262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.1947937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.3514099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.7352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.39068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.7394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.4877624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.7549743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.34808349609375
############ Running episode number: 235  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.6360168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 367.6864318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 369.94171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.8938293457031
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 366.477783203125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 363.1465759277344
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 361.8300476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 366.1806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.87237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.6918640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.22760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.5718688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.3566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 366.4167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 366.760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.2282409667969
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 362.15008544921875
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 361.46820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.7701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.35614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 365.2198181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.7034606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 366.1328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.3570861816406
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 1 20.0 911.223233653 (10.268274366284802, 10)
loss 362.0867919921875
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 363.8550109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.61273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 366.22503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.4784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.3671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 365.0531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 364.9890441894531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 4 19.0 913.272125304 (10.333617326102203, 11)
loss 360.2557678222656
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 365.3476257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.9234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 365.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.1986389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.8089294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.1971740722656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 364.5491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.82318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.3292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.14410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.3522644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.33349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7457580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.4477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.5096740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.5234375
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 362.24176025390625
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 363.1654052734375
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 363.71954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.69085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.4903869628906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.6085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.0201110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.39227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.2796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.1724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.7362365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.3403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.09112548828125
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 0 21.0 1354.56874896 (18.375894992990247, 9)
loss 360.8528747558594
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 367.4017333984375
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 361.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.53424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 366.3763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.5780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 365.02789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.20404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8185729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.46441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.7471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.0873107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 366.24407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.4306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.0899963378906
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.9813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.3193054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.6839294433594
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 366.57635498046875
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 364.3241882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.25244140625
############ Running episode number: 236  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.4845886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.4250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 365.5491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.8406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 366.5264587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 364.8354187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.5709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.74896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 369.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.3573913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.45684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 367.64739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.4059143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 364.8612365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.50616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.52947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.4325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.9147644042969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 361.27447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.7132873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.13507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 364.30670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 366.1473693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 365.2710266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 364.633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 365.38665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.51361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.97344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.7370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 364.88641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.55950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.1226501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.6343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.13397216796875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 365.1539306640625
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 362.91717529296875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 361.0067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 365.73272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.12652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.3183898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.7410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.2010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 365.2410583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.87127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 366.64190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.4615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.78179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.2096862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.9755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 364.8518981933594
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 365.5609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.98779296875
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 364.07391357421875
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 362.8186950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.19158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.6755065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.7306823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.7021179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.12548828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.7309875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.3262023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.6442565917969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.0172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.3785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.8213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.9389343261719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.8411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.6940002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.1544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.3006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 365.9764404296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.1561584472656
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 360.0371398925781
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 364.3965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.4305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.4783630371094
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.4985656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.25372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.2401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.4641418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.8381652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.47100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.30145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.37890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 364.09075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 366.05047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.3110046386719
############ Running episode number: 237  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.0750732421875
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.6382751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 364.1508483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8352966308594
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 360.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 366.6286926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 367.6253356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.12408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.34930419921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 362.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.95806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.8388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.10009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 367.61334228515625
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 363.7543640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 364.1603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.1982116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 365.15362548828125
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 2 19.0 919.412094444 (10.44185150623065, 11)
loss 362.5314025878906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 363.8135681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.3719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.39984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.5417175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 364.5702819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 369.1330871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.1830749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.39605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.6459045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.7980651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.52325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 369.8509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.7855529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 365.68255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.49462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.7344055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 367.2729187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 367.6871643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.0420837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.3609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 366.73443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.34051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.6450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 365.4555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.2422790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.28125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.3994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.2760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 364.2045593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 364.7185974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 365.38531494140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 364.2947692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.3150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.28826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.1282043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.41436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.90155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.22174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 364.4964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 365.4217224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 365.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.0025329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.089599609375
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.13409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.4923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.1768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 364.02972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.75445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.7483215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 364.699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.7078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 366.3077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.8301696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.3216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.6482238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 366.3826599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.558837890625
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.47662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 365.80230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.6934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.6081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.8173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.90740966796875
############ Running episode number: 238  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.4751281738281
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 362.0245666503906
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 366.4002685546875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 363.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.2762145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.17230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.0040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.1683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.22943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.7159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.71673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 364.2715148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.9298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.29180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 367.4933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.29315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.31024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.4836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.8612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.1834411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.9936828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 364.2284240722656
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 362.1743469238281
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 359.8395690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 365.68414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.6831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.3146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.6503601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.9572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.34075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.59271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.5343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.7056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.01910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.7429504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.9206237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.5155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.6054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 366.41949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 365.9951477050781
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 361.2531433105469
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 363.95001220703125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 4 19.0 963.253801267 (11.271571944085663, 11)
loss 366.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.5694274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.3184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.6319274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.1594543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.4459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.9506530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 365.89593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.6119689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.37249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.06292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.6976013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.9797668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 366.1474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.7942199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.5282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.4544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.37628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 366.61260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.3475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.7761535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.4881896972656
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 365.4997863769531
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 362.2323303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 364.3332824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.5311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.1656188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.75567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 366.2091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.91522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 364.02044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.1641540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 365.50848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.0226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.6799621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.83111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.9626159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 365.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.9302673339844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.60784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.66558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.4673156738281
############ Running episode number: 239  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 364.29498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.41864013671875
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 366.313232421875
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 0 22.0 993.95437024 (11.469111876584304, 8)
loss 363.74029541015625
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 363.44854736328125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 360.5219421386719
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 361.4075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 367.8045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.2354431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.5620422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.6809997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 366.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.6902160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.0819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.11883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.19244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.6399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.1697082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.10479736328125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 362.4602355957031
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 361.3872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.6385803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.6580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.9606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8412780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.6608581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.1625061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.7898864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.3625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.24322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.9441833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.76519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.6653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.2917175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.2275085449219
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 363.07672119140625
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 360.4564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.1481628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.2502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.99700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.4843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.35882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.0315246582031
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 361.5195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3613586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.5630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 365.6645202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.6942443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.29986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.3572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.6527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 364.3427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.97039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.1108703613281
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 2 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.6407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.39605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.7358703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.6535949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7821960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.3586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 364.3641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.34112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.42938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.8310241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3236999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.65966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 364.6593933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 366.07861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6327209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.53387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.8597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.4911804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.1453552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.91656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.8652038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 364.4307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.32220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.4360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.63043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.46044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.4372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.25238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.5282287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.3116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.2165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.2333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.03143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.2210998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.8180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.9799499511719
############ Running episode number: 240  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.4225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.63092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.3678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.1851501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.60723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.2115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.8811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.5195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1675720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.9013977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.42864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.0696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.4144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.60205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.33563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.2320861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.0723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.35546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5788269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.6888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.8519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.3526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.71051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.5638732910156
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 359.3138122558594
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 361.2650451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.3887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.9259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.23846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.4366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.7027893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.4643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.2032775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.3926086425781
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 360.390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.6537170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.2386169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.3360900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 365.5122375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.1736145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.3128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2746276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.1535949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.15155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.4400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.2935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.6485900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8349304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.5081481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.4129943847656
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 362.3201904296875
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 361.7261047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.7401123046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.0677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.8338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.7371520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.5667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 364.1206359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8560485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.74365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9944152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3204040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.6069030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.3975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.4998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.39898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.21295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.2702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8429260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.5614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.5962829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.38470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.4732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.1305236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.9178771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.4366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.0225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.16424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.38885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.0987243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.76605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.6295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.62945556640625
############ Running episode number: 241  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.6596374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.2522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4377746582031
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 363.3515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.57025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.10504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.3495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.7563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.17144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.96612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.75103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1132507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.22308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0608215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.6297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.15093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7021789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.62091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.9734191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.0302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.5628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.5294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.69775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.6524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.97747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6891784667969
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 361.2287292480469
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 362.777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.4788818359375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 363.6593017578125
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 361.5301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.41033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.65423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.56817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.5628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.3507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.6298522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.06195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.5218200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.7564392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.5653991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.9970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.4812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.0212707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.2124938964844
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 360.365234375
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 360.69879150390625
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 362.1363220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.3410339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.0815734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.8005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.1905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.35888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.9795837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.2373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.15869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.03741455078125
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 0 21.0 1385.62570908 (19.08360399753829, 9)
loss 364.438232421875
Current State,action,reward,Response time,Next State:  (9, 19.08360399753829) 1 22.0 1422.05003169 (18.668181536495972, 8)
loss 362.1998291015625
Current State,action,reward,Response time,Next State:  (8, 18.668181536495972) 3 21.0 1462.10152292 (18.375894992990247, 9)
loss 363.72149658203125
Current State,action,reward,Response time,Next State:  (9, 18.375894992990247) 3 20.0 1385.00495784 (17.82724819986867, 10)
loss 361.0497131347656
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 361.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1694030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.0423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.41015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.18646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.6433410644531
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 361.3067321777344
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 360.2251281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.67584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.0067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.1535949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.4680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.2848205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.49969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.53265380859375
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 361.3778076171875
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 363.4924011230469
############ Running episode number: 242  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.4285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.63067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.1891784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.4312438964844
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 361.14544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.45672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.0040588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.2464904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.45867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.7967224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.80621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.5811767578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 362.2067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.4714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9635925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.14208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.3187561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.88238525390625
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 360.6665344238281
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 359.6757507324219
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 362.8201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.74560546875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 362.649169921875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 363.41461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.5584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.54351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.8136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.2639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.1802062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.63043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.49932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.2974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.51202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.18719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.4443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.41644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.3745422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.39849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.5226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.51519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.0302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5056457519531
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 361.4287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.5649719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.21343994140625
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7753601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.19830322265625
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 364.57275390625
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 361.5016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.5572814941406
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.45831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.18603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.4529113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.5249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.8232116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.14935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.219482421875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 360.3890075683594
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 360.28656005859375
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 360.68603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.2204284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.89141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.2747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.2317810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.0938415527344
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 364.27435302734375
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 360.2640686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.7059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.7554626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.8291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.50848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.4857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.68255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.1676330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.6722106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.6661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.6551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5382995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.2410583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.51727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.09368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.5346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.1851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.2933654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.34625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.46966552734375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 360.41778564453125
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 361.3117980957031
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 363.4154357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.64227294921875
############ Running episode number: 243  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.6427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.8947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.9765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.4277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.4147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.83172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.4039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.07525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.1357116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.4645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.7529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.79498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 365.6511535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.19647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0641174316406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 4 19.0 920.244245637 (10.433149880183072, 11)
loss 362.82470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.40478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.48211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.4216613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.8217468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.99658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.9187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.8326416015625
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 362.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.57928466796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 362.18585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.8797302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.5625305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.8848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.9261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.8385314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.6563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.1695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.7951965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.2027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.7789611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.5149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 365.0676574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.7880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.3746032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.0254821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.2681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.51812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.5827941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.62322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0285339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.27801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.1203308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.5805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.9444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.0867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.13787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.2223205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.33892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.9510803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.93267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5708923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.7002258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.0869445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.6639099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.5862121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.0971984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.46807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.65692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.80615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.535400390625
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.19036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.7035217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.5580139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.4537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.6200256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.5964050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.0749206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.2337341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.8463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.9404602050781
############ Running episode number: 244  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8360290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.47845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.31573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2979431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5415344238281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 360.7068786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.79364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2528991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.16534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.69207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.1199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.4070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.6012878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 365.2074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.49468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.2212829589844
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 360.3756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.4877014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.4542541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.39727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.00787353515625
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 362.91778564453125
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 363.6012878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.3531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.88104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.60809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.2689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.56060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.0945129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.3348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.48040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.4237060546875
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 361.119873046875
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 361.699951171875
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 362.3227844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.16436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.04400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.56817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.6416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.96478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.13323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.6857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.6887512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.4802551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.45916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2090148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.3657531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.3202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.1777038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.3256530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.7903747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2712097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.2867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.8879089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.00146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.0001525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.5696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.99322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.61834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 364.6994934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.7490539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.81842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.28515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.5039367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7401428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.9973449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.2088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.6662292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.22528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.67578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.6083068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.7357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.1935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.5372619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2276916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.7133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.315673828125
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 362.4325866699219
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 362.3398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.55889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.0978088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.1246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.2682800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.8796081542969
############ Running episode number: 245  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.1549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.31402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.6248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.9693908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.3591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.4836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.4658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.4456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.52716064453125
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 363.4940490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.64752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.45330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.49652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.06500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9779968261719
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 362.5835876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.23126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.42938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.0322570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.78076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.80682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.7882385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1853332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.9635925292969
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 2 19.0 912.494916918 (10.278181486298042, 11)
loss 360.1495056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.1169128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.3851013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.14617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 363.4263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.58416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.07598876953125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 1 20.0 914.152581784 (10.390165524255663, 10)
loss 360.595947265625
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 359.7312316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1316223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.4413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.7472229003906
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 362.0271911621094
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 358.8149108886719
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 362.58966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.1371154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.6676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.4507141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.8057556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.76861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.5280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.255126953125
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 360.8577880859375
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 359.3541259765625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 361.33990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.6980285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.6648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.01556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.1631774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.1878967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.64862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.6296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.9662170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.8348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.4356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.7333679199219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 4 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.3633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2059631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.3375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.24053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.48077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.1929016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5469055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.89434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.3470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8268737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.6295166015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0830993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.30914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.1730041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.62322998046875
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 361.6238708496094
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 360.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.7333068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.62445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.9771423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.5888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.43707275390625
############ Running episode number: 246  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.41693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 360.8018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6110534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.0348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.6853332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.4256286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3633117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.5989074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.07147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.63641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.7247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.11358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.48675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.72491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.6332092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.17645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.3949279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.1322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.4364318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.50921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.1581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.0322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.3426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.7991027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.1685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.1576232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.2524108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.6424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.77886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.65240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.53265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 364.0957336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.64910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.19500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.9085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.3529968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.29901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3020935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.4295349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1143493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.35565185546875
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 360.27020263671875
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 359.775634765625
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 360.9254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.5231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.6011047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.58392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.603515625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 360.7165832519531
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 0 22.0 1405.95387237 (19.223969507401588, 8)
loss 359.5090637207031
Current State,action,reward,Response time,Next State:  (8, 19.223969507401588) 3 21.0 1494.58443695 (19.25591252280865, 9)
loss 361.1114196777344
Current State,action,reward,Response time,Next State:  (9, 19.25591252280865) 3 20.0 1431.06953264 (19.08360399753829, 10)
loss 361.2898254394531
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 360.6590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.61944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.60833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.0843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.6317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.1110534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.3706359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.5904235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.9286193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.54022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0379943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.6288146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.2637634277344
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.4959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.2141418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0861511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.7962341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.2014465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.45501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.4701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.9841003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.36669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.26519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5912780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.6051330566406
############ Running episode number: 247  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.63140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.7358093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.9790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.2979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.78692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.3497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.5917663574219
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 363.8852844238281
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 361.31280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.7319030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.1165771484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 359.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.1924743652344
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 361.96954345703125
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 361.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.9510803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3428039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.04486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.2279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.9175109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.2069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.3685607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.5297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.3283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.0833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.0033264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.1988830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.06927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.51904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.59600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.59393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 363.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.5635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.85052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.6092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.6480407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.7056579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.3025817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.2605895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.8121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.4019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.1547546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.19244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.4988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.6766662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.0875549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.1304016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.52044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.85308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2095031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.1497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.61993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.61370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.8028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.05255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.3114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.2013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.2356872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.99859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.7449035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.6585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5070495605469
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.7039489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.2035827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.4658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.28900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.60888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.2351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.2710266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.1333312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.5711364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.2922668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.67926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.4146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.7154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.66168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5572509765625
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 0 21.0 1257.77263112 (16.845818065953559, 9)
loss 360.8004150390625
Current State,action,reward,Response time,Next State:  (9, 16.845818065953559) 3 20.0 1304.91298164 (17.052961248403161, 10)
loss 363.04376220703125
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 363.8295593261719
############ Running episode number: 248  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.27447509765625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 1 20.0 1000.80824137 (11.786394321941378, 10)
loss 360.6502380371094
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 4 19.0 1002.85899476 (11.61852219546234, 11)
loss 361.82696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1931457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.2457580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.5980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.6198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.9887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.6412658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.7372741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.47454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.5016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.77459716796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 359.74810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.5823669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.4456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.7049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.1708679199219
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 360.5791015625
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 4 19.0 928.707336523 (10.344006106602812, 11)
loss 359.9527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.97467041015625
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 361.33935546875
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 364.0158386230469
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 361.6451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.27862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.91943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.47015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.0687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.6343078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.29931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 365.3080749511719
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 362.9161071777344
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 361.3709716796875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 361.5907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.79730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.6670227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.12139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6808776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.1668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.1720886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.7215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.6925964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.3454284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2834167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.53076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.8357849121094
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 360.37554931640625
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 361.8679504394531
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 361.9647216796875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 361.42315673828125
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 359.8131408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.5611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.5740051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.6683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.44696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.3794860839844
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 2 19.0 1373.60319427 (19.286321916040979, 11)
loss 364.1748046875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 363.3055725097656
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 363.6039123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.4979553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.96051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.8560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.9488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.75
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.3362731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 365.3204040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1849670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.1121520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.9501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.1267395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.5726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.4618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.2784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3768615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.8805847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.83782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.94305419921875
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6111145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.5836486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.5441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.65411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.45574951171875
############ Running episode number: 249  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.8289794921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 4 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.4593811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.6952819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.18670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.5133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.04913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.5738220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.2254333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.4554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.2243957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.7652893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.51214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.0206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.0991516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.93316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.57501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.4707336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.15228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.55865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.4181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.5860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.8468933105469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1956481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.65948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.8212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.1493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.4206237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.2286682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.4045715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.1940612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7051086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.2215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.29510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.3328552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.5126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.69696044921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.4544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.32794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.64007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.44287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.6806335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.2456970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.7420349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.3341369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.22265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.6884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.5374450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.7218933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.5545349121094
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 0 21.0 1392.48057747 (19.223969507401588, 9)
loss 362.7782897949219
Current State,action,reward,Response time,Next State:  (9, 19.223969507401588) 3 20.0 1429.39747342 (19.25591252280865, 10)
loss 360.2655944824219
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 3 19.0 1399.0728054 (19.08360399753829, 11)
loss 360.8238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.6236267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.38201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.6688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.9794616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.7130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.04229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.7115783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.4857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.1153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.3237609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.29425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.9129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.1564636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.6256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.5444030761719
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.6680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.69757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.70343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.71478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.7489929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.9219665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.3503723144531
############ Running episode number: 250  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.0181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.4654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.72308349609375
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 362.2818298339844
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 4 19.0 979.00811241 (11.25610796929319, 11)
loss 360.7526550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.6739196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.26092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.5352478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.6615295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.16864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.5222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.93243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.50634765625
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 363.3670654296875
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 362.6606750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.3920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.71832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.1968078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 363.742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.3694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 365.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.63885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.7119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7779846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.8713684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.7159729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.4453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.85089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.6399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.9569091796875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 362.9300231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.01763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.3227233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.2180480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.5855407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.8413391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.0437927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.7704772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.9578552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.1883850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.21636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.04852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.7926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.6320495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.1996154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.42681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.7956848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.84771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.6395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.6514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.7843322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.6760559082031
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 361.08172607421875
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 361.15338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.77392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.7624206542969
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 0 21.0 1376.52055872 (18.668181536495972, 9)
loss 361.80889892578125
Current State,action,reward,Response time,Next State:  (9, 18.668181536495972) 3 20.0 1400.30471596 (18.375894992990247, 10)
loss 362.3763732910156
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 360.5781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.1769714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.84613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.53179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.83160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.3738098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.0581359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.28326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2973937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.4374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.81353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2530212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.86688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6941833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.62567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.1968688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.60552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.8062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.65423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.2520446777344
############ Running episode number: 251  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6757507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.2445373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.33892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.2411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.5496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.20819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 364.0233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.3245544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.6238098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.2608947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.9420471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.70599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.77001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.2885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.3153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.8581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.5105285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 363.61236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.6343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.15875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.68585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.30255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.4044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.5509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.0190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.85498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.2120056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.7096252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.4600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.7862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.7133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.33343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.3279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 364.1726379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.2689514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.5954895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.2021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 4 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7684631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.8592529296875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 362.1925354003906
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 361.1914367675781
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 360.4339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.0126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.9108581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.3982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.0377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.8079833984375
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 361.2109375
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 360.2762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.6284484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.8877258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.81048583984375
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 361.6593322753906
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 363.16888427734375
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 362.1879577636719
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 361.2236022949219
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 360.6698303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.5433044433594
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 363.0771789550781
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 360.68585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.2865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.03875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.1120910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.7020568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.7817077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.7463684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.09259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.2835998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.5155944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.72332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.04058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.5487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.34552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.3212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.3238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.0320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.3680114746094
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.9713439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.45904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2033996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.2564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.3299560546875
############ Running episode number: 252  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.7823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.64434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.6773986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.28033447265625
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 361.0645751953125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 360.3085021972656
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 359.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.8088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.02374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.3372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.32476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.7295837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.37847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.11260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.8006591796875
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 362.3935852050781
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 361.04534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.85015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.7166442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.9117126464844
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 361.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.5305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.1344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.63262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.5901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.7359924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.4390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.23297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.4709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.08319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.20977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.31842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.2116394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2845764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.40576171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4121398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.8952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.8831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.68426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.7349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.73822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.80108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.0137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.40625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7783508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.3560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.58551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.7566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.7315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3507385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.18487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.35821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.9158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.9440002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.8556823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.3100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.2542419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.7931213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.4149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.4323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.53094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.14141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.9115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.9443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.7359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.9406433105469
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 2 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.71331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.4651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.0919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.6065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.25372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.21710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.14599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.1385192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.3423767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.1516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.5260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.6722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.65496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.24786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.2885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.7005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.0611572265625
############ Running episode number: 253  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.6174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.4813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.1206359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.49505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.3055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.3253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.50634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.9425964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.94720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.83148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 364.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.0931091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.5304870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.8478698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1570129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.2649841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.9967346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.7791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.3884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1620178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.15057373046875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 361.46270751953125
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 360.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.7746276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.2983093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.6474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 364.2801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.52618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.4363708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.01495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.8077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.1683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.9605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.6537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.4100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.46600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.4767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.036865234375
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 0 21.0 951.466016946 (11.271571944085663, 9)
loss 361.98016357421875
Current State,action,reward,Response time,Next State:  (9, 11.271571944085663) 3 20.0 1013.12870607 (11.670334358779868, 10)
loss 362.6686096191406
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 359.66693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.2441711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.68988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.06903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.0577697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.7424011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.0866394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.66473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.2861328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 4 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.55364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.6922912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.77020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.30975341796875
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 361.34112548828125
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 360.00701904296875
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 361.4676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.47283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.11590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.5548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.61260986328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5102844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.82757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.3648986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.1185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.74676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.3087463378906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.6268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.68426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.4231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 365.46575927734375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 360.1632995605469
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 360.80328369140625
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 361.0400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.5968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.3736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3804626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.1780090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.5553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.8524169921875
############ Running episode number: 254  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 364.861083984375
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 362.3200378417969
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 4 19.0 1002.85899476 (11.61852219546234, 11)
loss 362.7065734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.76885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.8719482421875
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 360.2015380859375
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 361.626708984375
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 361.13543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.98480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.62042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.6734313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.74078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.71343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.64306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7574157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.64593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.8278503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.9647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.9004211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 365.00274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.71209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 364.36102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.67047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.5133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.3528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.1318054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.2342834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.79840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.2184143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0332336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.0045471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.1094055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 364.35113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.91522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.55450439453125
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 363.0042419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.8699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.63702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.23638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.2266540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.59722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.9536437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.12005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.4276428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.36505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.1180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.4767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 364.9884948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.44635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.12939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.15289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.52813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.3194274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.1011657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.50494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 363.1000061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.8335266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.5520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.98150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.48974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.4202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.8438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.07421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.1701354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.45941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.1206970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.09954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.2560729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.6372985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.18829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.5903625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.64739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.71978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.6761474609375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 362.8821105957031
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 361.0666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.1073913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.31756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.6244812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.5265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.74993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.4465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.50201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.1886901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.34588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.5130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.510986328125
############ Running episode number: 255  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 362.5101623535156
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 362.24078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.2994689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.3645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.3195495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.50140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.12799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.4566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.30059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.2890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.5152893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.51458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.4869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.2071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.8714904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.77581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.92718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.71527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.9815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.57513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.74334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0818786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.5447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1080017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.4651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.0690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.5849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.4635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4859924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.5393371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.5671081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 365.2523498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.7969055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9090270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.7502746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.73089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.0589294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.14495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.0966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.7904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.34197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6598205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 364.3485107421875
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 360.8738708496094
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 360.2921142578125
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 362.447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.06451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.19415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.0017395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7303161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7841491699219
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 361.4031982421875
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 360.1253356933594
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.90765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.2568664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.95867919921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.5506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.57183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.11505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 363.4020690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.4154357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.1551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.259765625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.21490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.76300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2651672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.3152160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.9212341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.7407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.26666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.5708312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.2592468261719
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.46539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.4892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.53704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.4483642578125
############ Running episode number: 256  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 363.427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.40625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.71746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.1536560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.5753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.6984558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.2966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.0409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.4555969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.8057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6143493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.6688537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.3147277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.66900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.43212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.11767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.21099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.71002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.1795959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9134826660156
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 361.3211669921875
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 363.0965270996094
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 364.0256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.4814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.8336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.67340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.44891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.2719421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.35125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.10284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.5481872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.1931457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.4468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.27386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.6976013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.5171203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.42559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.4319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.97686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.57562255859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 363.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.0934143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.95269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.5537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.46661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.41058349609375
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 1 20.0 1373.60319427 (19.286321916040979, 10)
loss 359.1391906738281
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 361.1826477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.1922302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.69305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.39764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.11163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.39373779296875
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 361.9399719238281
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 361.6327819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.3927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.4615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.11273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.9016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.9637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.10748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.15057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.27789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.3799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.7297668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.6781311035156
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.7539978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 364.4640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.0510559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.275390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 4 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.8901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.64898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.6811218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.7293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8988342285156
############ Running episode number: 257  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.6099853515625
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 0 21.0 1000.80824137 (11.786394321941378, 9)
loss 359.10760498046875
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 359.7784423828125
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 0 22.0 993.95437024 (11.469111876584304, 8)
loss 361.4539489746094
Current State,action,reward,Response time,Next State:  (8, 11.469111876584304) 3 21.0 1041.35337246 (11.336751742492702, 9)
loss 362.30157470703125
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 362.63360595703125
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 361.1328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.85369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5233459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.5439758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1716613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.4044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.85601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.0559387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.2591247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.28240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.6110534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1944885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.55487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.76251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.7366027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.6549377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.5380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.27093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.1394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.3597106933594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 362.29718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.98211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.34234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.2383117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.4661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5853271484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.8325500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 364.6354675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.2865905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 364.54791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.2510986328125
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 361.2422180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.7746887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6164855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.50341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.71923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.2121887207031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.2452697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.4496765136719
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 2 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.5654602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.84063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.90399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.8564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.94293212890625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 360.7104797363281
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 359.71453857421875
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 363.89385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.4773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.77447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.1272277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.30865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.3165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.94012451171875
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.2012023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2055969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.8297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.5564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 364.69158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.49078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.5765380859375
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 0 21.0 1204.52470225 (15.550833128512703, 9)
loss 360.5688171386719
Current State,action,reward,Response time,Next State:  (9, 15.550833128512703) 3 20.0 1237.12691092 (15.446694946204717, 10)
loss 364.67559814453125
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 359.3467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.49981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.54547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.8221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.0728454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.6177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.2391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.1935729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.61004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.0712890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 4 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.0685119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.0043029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.48046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.32275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.3695068359375
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.416259765625
############ Running episode number: 258  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.54693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.8970031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.7734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.4559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.8223571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.6313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.2350158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.2228698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.49151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.6238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.0706481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.8754577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.3351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.4009094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.83697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.2442321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 364.8186950683594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 362.9073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.41534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.37188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.77777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6548767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.1507263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.17462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.2044372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.5107116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.07855224609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 362.5872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2436218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 364.09716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.8087158203125
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 360.5624694824219
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 359.71685791015625
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 360.1599426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.10601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.58734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.41888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.549072265625
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 359.5792236328125
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 361.40350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.0950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.8045959472656
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 361.2278137207031
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 361.926025390625
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 363.0447082519531
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 361.7886657714844
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 363.9759826660156
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 360.8470764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.3688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.4713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.16888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.56597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.55474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.2220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.3394470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 364.2228698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.1252136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.78118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.13885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.51080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.65924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.62615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.76544189453125
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 361.00421142578125
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 363.52740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.2210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.07098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.2143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.5480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.1646423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.6460876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.56982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.96063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4287414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.27203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.6749572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.71942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.8485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6726379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.4992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.54473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.7900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.61395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 364.0627746582031
############ Running episode number: 259  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.2261047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 364.6612243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.7604675292969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 360.58721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.50433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.3304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.7828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.8914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.18255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.1646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.5675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.2407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.9996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 364.5744934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.9088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.09942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.4476013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.26751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.8977355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.1741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.4642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.0337219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.54022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.22222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.4673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.0063171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 364.0685729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.9638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.77142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3367614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.4497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7624206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.8375549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.4509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.0474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.8460388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.17315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.0932922363281
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 360.9704284667969
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 362.303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.49951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.9000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.1426696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.57904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8397521972656
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 0 21.0 1204.9600972 (16.466876895473597, 9)
loss 360.9787902832031
Current State,action,reward,Response time,Next State:  (9, 16.466876895473597) 3 20.0 1285.07728144 (16.871606159345866, 10)
loss 363.13543701171875
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 360.35986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.78936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.6144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.84576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 364.0293884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 365.35858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2550964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.7161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.5000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.3479309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.50311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.4647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3083801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.56951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.9128112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.5018005371094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2694396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.2057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.90338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.4166259765625
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.8080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.2878112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.6636657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.25518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.3759460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.5351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.07415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.9400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2529602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.5156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2589416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.0756530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.2959289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.8271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.30743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.0823669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.2258605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.66351318359375
############ Running episode number: 260  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.3307189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8962097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.6902770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.7108459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.8602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.3331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.2807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.0161437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.73382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 365.06610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.13885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.4966735839844
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 361.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.0888366699219
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 2 19.0 922.369964659 (10.448897752470936, 11)
loss 360.63421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 364.6167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.60260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.60919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.7090148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.68035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.6998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.3409118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.1664123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.89404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.6308288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.1152648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.76220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.1293640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.47296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.5370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.3621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.88104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.8782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.6322937011719
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 359.8448791503906
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 362.7065734863281
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 361.75323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.7421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.62310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.1114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.687255859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.73541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.3083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.05108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.23333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.02288818359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.2249450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.3536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.6616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.2349853515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 364.45452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 365.3644714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.39764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.96014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.5710754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.51019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.72344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.8680114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 363.726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.50054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.93280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.37969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.1762390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.5401306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.6827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.7510681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.5470275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.25634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.8369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.4440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.4324645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 363.54217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.5768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.79937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.36944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.1766662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.2555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.3771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.1430358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.4909362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.1728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.2186584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 364.6151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.968505859375
############ Running episode number: 261  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.0359802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.2172546386719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 4 19.0 990.920419923 (11.61852219546234, 11)
loss 361.3131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.11309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.4427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.5681457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.1723327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.2737121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.7901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.40362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.12518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.3538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.7093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.6913146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.5375061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.4627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.5788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.499267578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 4 19.0 917.051082408 (10.344006106602812, 11)
loss 364.5467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.6507873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9234313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.52197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.4591979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.2870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.31536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.6269836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.7563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.3006896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.54107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.9026184082031
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 0 21.0 945.391786838 (11.039747673816453, 9)
loss 360.41229248046875
Current State,action,reward,Response time,Next State:  (9, 11.039747673816453) 3 20.0 1000.99384957 (11.271571944085663, 10)
loss 361.1546325683594
Current State,action,reward,Response time,Next State:  (10, 11.271571944085663) 3 19.0 975.550709187 (11.670334358779868, 11)
loss 361.3377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.7754821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.96307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.64520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.82830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.4012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.2972106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.55126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.15789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1117248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 364.307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.2734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.60870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.41650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0622253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.0667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.3448181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.12451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.9390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.8630065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.3906555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.95233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.1893005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.66900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.0486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.6609191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.5894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.3445739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.6496887207031
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 361.11834716796875
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 363.32611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.1819152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.64654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.6672668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.8363952636719
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 362.25286865234375
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 361.5439147949219
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 362.4500427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.4491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.2897644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.03912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 365.15576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.9796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.7821960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.79388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.5941467285156
############ Running episode number: 262  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.4864196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.14697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.4128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.09820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.1356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.6994323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.3851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.5021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.54608154296875
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 1 20.0 939.691239608 (10.819208572963639, 10)
loss 361.3467102050781
Current State,action,reward,Response time,Next State:  (10, 10.819208572963639) 1 21.0 951.555504911 (10.768325938188134, 9)
loss 362.5940856933594
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 360.2764892578125
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 363.16241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.47808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.3050842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.95513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.8486633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.2091064453125
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 2 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.8257141113281
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 361.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.80078125
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 2 19.0 919.104778396 (10.388469398680568, 11)
loss 362.4537658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.146728515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 359.80206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.3478088378906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 360.8740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.0855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.1601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.5320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.57421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.55499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.9800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.6737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.3314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5513916015625
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 360.6753845214844
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 361.5569152832031
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 363.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.8886413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 364.5022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.82647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.00970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.2152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.16021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.1316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.19818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.17919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.7416076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.68218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.3240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.0982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.9305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.8852844238281
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 363.1881103515625
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 363.0133972167969
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 361.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3322448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.58526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.68621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.5077209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.4399719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.3481750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.3493347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.5697937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.7698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.35382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.13458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.6797790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.4629211425781
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 362.77130126953125
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 4 19.0 1238.94234737 (15.950694610794756, 11)
loss 363.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.63568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.4712829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.1359558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.9229736328125
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 360.7149353027344
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 364.5413513183594
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 362.27423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.72314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 364.9925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.32928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.86151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.29852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.43994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.05950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.7000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4965515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.1376037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.6759948730469
############ Running episode number: 263  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.3653259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.1187438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.8667297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.3710632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.5375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.9226379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.4102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.74505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.48394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0010070800781
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 362.1376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.4388732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.16278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.6424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.46112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.68560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.0907287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.09820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.11700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.1087951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.52264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.9454650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.39813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.65753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.6641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.2760314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.37518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.8004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.9578552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.6947937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.34356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.5003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.33819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.6492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.53106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.2647399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3435974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.03466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 364.558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.32733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.2854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2318420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.5489196777344
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.48193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.3888244628906
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 361.0099182128906
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 363.3916015625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 362.14276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.9428405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.7261047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.6734313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.56634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 363.64544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.3730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.2698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.2743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.53277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.7760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.9190368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.6978454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.1815490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.5458679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.1483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.6585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.8081970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.7043151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 364.6316223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.8463439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.6474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.8895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.54681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.36688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.1183776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.4480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1214904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.39215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 364.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.5086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.6155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.0124816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.7583312988281
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 2 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.5706481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.8765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.7969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.63714599609375
############ Running episode number: 264  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.2697448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.3441467285156
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 0 21.0 982.049698353 (11.469111876584304, 9)
loss 360.1735534667969
Current State,action,reward,Response time,Next State:  (9, 11.469111876584304) 3 20.0 1023.46894667 (11.336751742492702, 10)
loss 361.5389404296875
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 362.17364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.7609558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.31390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7598571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.2377624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.0308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.30548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.2986145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.3293151855469
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 2 19.0 925.738299342 (10.553846649940214, 11)
loss 359.58642578125
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 363.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.6612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.8407287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.9440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5597839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.7508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.6211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.5381164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.7270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.4325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.1378479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.2861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.83782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.0054626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.9942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6897888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.129150390625
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 359.4017333984375
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 363.3320007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 363.27978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.79925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 363.1752014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.3482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.4238586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.5674743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.7036437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.76300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.4326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.5054016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.15673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.6948547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.3202209472656
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 360.4307556152344
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 360.53631591796875
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 361.0593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.1607971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.9263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.36273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.3719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 363.7426452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.59576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.1827087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.3374328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.7341003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.4329528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.40960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.0357971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 363.2431945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.73687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.1921081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.1462707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.43548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.7497253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.7259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.1342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.58538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.5077819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9952087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.5497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.11334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7351379394531
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 2 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.78448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.9798278808594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.2901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.8828430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.6822204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.1838073730469
############ Running episode number: 265  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.6751403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.8479309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.3103332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.01239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.6819152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.19085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.1256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.1112365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.7054138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 364.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.15838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.1869812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 364.170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.22467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.05120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.7370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 363.1392517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.6441345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.7261047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.8399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.9297790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.88970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.5297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.1343688964844
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 361.8176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.5766906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.1834411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.80242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6667785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.7691955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.1171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.9276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 364.2190246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.61114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.5080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.1380920410156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 364.13690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.2872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.7361755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.7344665527344
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 360.4630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.13775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.1483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.9561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.3887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.49530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 364.8475646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.7703552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.3684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.47625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5427551269531
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 0 21.0 1294.6873044 (17.669285735563751, 9)
loss 361.5875244140625
Current State,action,reward,Response time,Next State:  (9, 17.669285735563751) 3 20.0 1348.01745033 (17.944480812078613, 10)
loss 360.9129333496094
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 360.5876159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6380920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.1424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.7871398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 365.0493469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.4659118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.28692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.2832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.4171447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.15704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.3197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.9931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.55548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.81707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.21075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.2129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.5819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.1800842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.8443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.7669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.4324645996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 365.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.7261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.64141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.6991271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.4904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.0509948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.48284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.9939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.6976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.2767028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.31475830078125
############ Running episode number: 266  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.3018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 363.1156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.7528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.2342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.1018371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.2882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.2672424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.0057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 363.0372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.5975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.9329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.0054016113281
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 360.1031188964844
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 363.4172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.3970642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.63525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.0095520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 363.69097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.5133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.1414489746094
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 359.8363952636719
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 360.4207458496094
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 360.4247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.02996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.70855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.61309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6049499511719
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 361.6429138183594
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 361.14520263671875
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 362.5621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.1702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.5095520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.1590270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.66949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.7208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.80938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.3577880859375
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 362.4832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.07122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.5126647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 363.06951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.5332946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.51190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.0968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.67596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.5497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.4513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.1036682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.48193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.5344543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.4937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.09881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.7301940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.5057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.6446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.7185974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.1815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.12249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 363.2128601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.2951354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3107604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.6072082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.2769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.6103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.7826232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.36065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.9507141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.2531433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.8749084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.67138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.2763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.3657531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.92901611328125
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 363.3245849609375
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 361.0848083496094
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 361.1709289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.1858215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.31732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 364.2750549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.2734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.3039855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.58831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.2460632324219
############ Running episode number: 267  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.6175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.2265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.5558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.3336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.16357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.1754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.4244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.2624206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.4675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 363.5019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.44732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.44171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.2483215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.4429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.0162048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.7408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0880432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.4134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.1401062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.75347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.3057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.2123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.41650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.2803039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.78204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1166687011719
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 1 20.0 909.008683798 (10.369891240151098, 10)
loss 362.0066223144531
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 359.3716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.7106018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.2268371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.37884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 363.0404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.50958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6799621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.33941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.3106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.6485900878906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.4925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.3233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 364.7257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.0755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.7330627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.7589416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.59930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.80596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4585266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.7615661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.5341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.1598815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.7222595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.5046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.2165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.2625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.7038269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.6136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.6158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.61590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5475769042969
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.5020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2778625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.8962707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.1187438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 364.54693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.35321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.79107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.97247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.52178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.14495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.3207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2000427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.6050720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.7884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.33746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.78839111328125
############ Running episode number: 268  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.7326354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.7933349609375
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 1 20.0 990.920419923 (11.61852219546234, 10)
loss 360.8438415527344
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 360.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.1552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.6784362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 365.368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.6820373535156
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0530700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.7757568359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.37213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.05487060546875
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 360.1270751953125
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 361.6915588378906
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 361.04150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.8500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.08251953125
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 363.5094299316406
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 359.7364807128906
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 364.054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.1076354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.1628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.6221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1296081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.31524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.1750183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 364.6851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.36212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 363.33465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.84735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.59716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.5129089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.1836242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.18939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.1954040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.21746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.4469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.2703552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.71868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.38018798828125
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 359.4560852050781
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 360.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 363.8309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.11431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.9730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.2870788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.6539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.19110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.52764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2195129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.6028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.11065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6056823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.2883605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8894958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.35699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.8345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.7432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.7319641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.82635498046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.6827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.44097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.3275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.6316223144531
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 360.3958740234375
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 360.1614685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.2831726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.9050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.2514343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.25177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.6182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.3971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.6581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.3616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5330505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.3102111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.5423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.03350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.7781677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.04962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.8865661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.3207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.8575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.5223083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8707580566406
############ Running episode number: 269  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.09600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.2567443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.26141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.44195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.51922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.6021423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.7045593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.17864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.5841369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.0260925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.5449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.2648010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.22674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.6183776855469
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 362.7666931152344
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 361.91680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.0819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.453857421875
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 362.6629638671875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 360.9269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.3727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.3199768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.2222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.0571594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.9788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.0513000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.80078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.6689147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.66229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2533874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.6589050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 363.8658752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.7243957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.3155212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.6424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.5155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.0987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.7388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.45513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.93353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.8392028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.1932678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.1940612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.4206237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.70355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.17401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.6695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.36328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.3870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.7255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.74444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.79766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.2626037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.1047058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.4889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 363.4232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.30938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.37969970703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 4 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.6138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.8548278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.9328308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.1390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.17999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 363.07684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.6002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.6423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.6107482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.79486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.7965393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.1596374511719
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.77703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 363.37689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.1357116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.39239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.3562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0962829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.32269287109375
############ Running episode number: 270  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.31890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.6449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.4262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.0897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.8411560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.1703796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.7693176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.4603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.3569030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.8648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.0015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.68084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.8268737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.0196838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.7643127441406
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 359.150146484375
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 362.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.5564880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.3022155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.3719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.2562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.73797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.29541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.81005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 363.1167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.6947937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.8583068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.29150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.0919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.19873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.1283264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.6216735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.0946350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.42938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.0865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.7374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.63897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0624694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.6653137207031
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 360.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.69049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.79949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.96722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.03521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.3700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.7857971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.8717956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.6081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.32745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7644348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3294372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.2562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.1477355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.18182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.19561767578125
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 359.5463562011719
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 360.89031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.50701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.8878479003906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.6591796875
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 360.6300048828125
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 363.0916442871094
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 363.6009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.4559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.0943908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.67919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.3265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.4041442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2664489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7413024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.98883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2119445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 363.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.2898254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.41998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.6751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.55804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.0726623535156
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 361.7763977050781
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 359.0736999511719
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 360.29888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.1002502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.8341064453125
############ Running episode number: 271  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.6310119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.0941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.9918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.12396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.8502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.7760314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.01373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.3341369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.9473876953125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 0 21.0 939.812260006 (10.768325938188134, 9)
loss 362.6186218261719
Current State,action,reward,Response time,Next State:  (9, 10.768325938188134) 3 20.0 986.786261176 (10.772009508959538, 10)
loss 361.2549743652344
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 363.1494445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.24969482421875
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 360.3496398925781
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 361.8698425292969
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 360.8070983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.1393737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.0431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.3255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.50732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.9053955078125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 360.8633728027344
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 362.51153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.3832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.34686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.4393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.48602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.83905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.77789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.10308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.6039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7253112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.3191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.0784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.70458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.9897155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.5179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.25445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 364.1421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.5291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 364.3092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.7349548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.6789245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.91064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.0928955078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.3326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.09368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.2535705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.28607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.443115234375
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 360.6379089355469
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 363.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.0862121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.7406921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.50152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.2143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.2439880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.1908264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.2972106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.2930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.1414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4378967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.4169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.82025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.5986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1290588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.28936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.7153625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.6973571777344
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 2 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.1172790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.08514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.6151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.83465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.75787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.1232604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 363.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.27734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 363.1919250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.4482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.4689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.9798889160156
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 362.6114196777344
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 360.1382141113281
############ Running episode number: 272  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.52508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.71966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.45013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.7305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.63262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 363.41937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.7455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.2895202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.63275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.8488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1540832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.8974304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.7982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.45135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.89520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.02886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.64129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2143249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.64678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.1953430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.1188659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.9248046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 362.593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.2073669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.2719421386719
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 362.5164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 364.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.11859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.38592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.0617980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.2002868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.9188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4402160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.8598937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.4330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.6496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.67755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.8960266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.2196350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.4560241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.56671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.7583312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.32403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.2235412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.5178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.09967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.7805480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.1195983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7400817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.12945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.32806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.2666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.31109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.63214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.8280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.8135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.5145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.1752624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.3977355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.0506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.18328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.1863708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.1922302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.91082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.83612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.9079895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 363.7529602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.11627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.1894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.8662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.6646423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.2607727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.01416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.28643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.65728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.7447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.52490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.731201171875
############ Running episode number: 273  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.00457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.01007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.40631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.17431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.4018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.65472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.5105285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.6860656738281
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 362.4104919433594
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 359.713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.3052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.19952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1305847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.23291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.65692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.95489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.4336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.3273010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.1293640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.7202453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.2074890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 364.2680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.4072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.8138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.06890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6681823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7856140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.0969543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.94903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.78729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.7143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.6551208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.4629211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.5574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.52801513671875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 360.64862060546875
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 362.2864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.3459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.2982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.30401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.6135559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.57958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.2783508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.53955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.18878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.5670471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.3111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.4875183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.16796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.3503723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.5965270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0294494628906
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 360.50360107421875
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 359.8768310546875
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 360.1709289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.5655822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.6018371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0946960449219
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 362.4810485839844
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 361.5252685546875
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 2 20.0 1397.37841716 (19.25591252280865, 10)
loss 359.8366394042969
Current State,action,reward,Response time,Next State:  (10, 19.25591252280865) 2 20.0 1399.0728054 (19.08360399753829, 10)
loss 359.097412109375
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 361.7781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.3822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.7104797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2808532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.5780944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 363.0311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.8238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.49822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.6507263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.90191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.6191101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.30364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6504821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.8577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.72064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.7424621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.2427673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.2049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.75048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.69110107421875
############ Running episode number: 274  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.04571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.3122253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.8202209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.25762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.25897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.1895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.65283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2734069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.55963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.78216552734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 362.15667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.71868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8436584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.6213073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5243225097656
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 360.45428466796875
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 362.2762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.3634948730469
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 359.89642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.3857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.4012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.39776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.8898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.3093566894531
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 2 19.0 909.642131904 (10.276491935146446, 11)
loss 361.10693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.26708984375
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 362.4874267578125
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 361.7208251953125
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 362.5916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5693664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.0435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.55810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.40185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.50457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.50787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.07940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.83111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.86383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.3477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.6629943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.1312561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.4145812988281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7687072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.2635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5174865722656
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 360.3532409667969
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 360.1572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.05047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.75958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.7860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5225524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.13507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.28472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.23944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.40240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.6823425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.2625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7164001464844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.4371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.390869140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.48516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.4183044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1838684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1744689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.7654724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.4187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.8385925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.6878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.1123352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.92730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8232116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.2531433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.2210388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.9192199707031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.4256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.29193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 365.171142578125
############ Running episode number: 275  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.1648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.4420471191406
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 361.6890563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.6138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.5046081542969
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 360.6568298339844
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 360.6922912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.3168640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.05487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.01409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.9130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3597106933594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 4 19.0 937.318160694 (10.644925616761762, 11)
loss 360.7728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.10882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.41741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.75946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.7318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.29595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.41693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.2977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.69549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.9462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 363.32611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.4187316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.3453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2237854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.4306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.46197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.8951110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.51861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.07855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.1854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.9629821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.4955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.69049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.3703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.85687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.6774597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.54730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 363.3301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8962097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.95269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.6976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.56317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.3070983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.71246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 363.1494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.46636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.37286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.19403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7381286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.42333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.5533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.5308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.3152770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.7803649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.1114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.4845886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.87213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.44342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.47003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6890563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.6413879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7008361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.15496826171875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.5487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.5310363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.64044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.9965515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.12445068359375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 2 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.36376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.6495056152344
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 1 20.0 1221.34827555 (16.229253414601111, 10)
loss 363.1651611328125
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 359.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.2846374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.49957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.51605224609375
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 360.4753112792969
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 360.24755859375
############ Running episode number: 276  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.0345764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.41107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.1580505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3246154785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 359.5546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.4162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.0037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.83160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.20849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.98492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.7840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.39923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.4317932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.66973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.6874084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.59033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.27667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.3052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.65374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.7181701660156
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 361.9231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.15325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.10968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.4649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.37933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.6885070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7747497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.1382751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.64306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6234436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.1946716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.4883117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4427795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.1817321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.77008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.44427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.1410827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.4842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.86517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.9794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.17034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.2347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.55853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.13165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.76300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9729919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.30712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.9885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.55743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.81884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.69647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.7840270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.2392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.5571594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.54296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.7804260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.66204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.6492004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.3642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.9627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.17840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.3416442871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5878601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4272155761719
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 0 21.0 1219.63501821 (16.147078378791146, 9)
loss 360.7035827636719
Current State,action,reward,Response time,Next State:  (9, 16.147078378791146) 3 20.0 1268.3374073 (16.229253414601111, 10)
loss 362.0807189941406
Current State,action,reward,Response time,Next State:  (10, 16.229253414601111) 3 19.0 1238.52642122 (16.295120821876548, 11)
loss 360.4801025390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 4 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.61395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.1643371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.32403564453125
############ Running episode number: 277  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.1422424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.41436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.41046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.6348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.3416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.4598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.2439880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.5615539550781
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 1 20.0 937.318160694 (10.644925616761762, 10)
loss 361.88531494140625
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 361.4378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.47259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7080993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.2560729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4035949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5616760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.65802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4936828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.2756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.12628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.0662536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.12860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.4912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.60943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.70098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.0893249511719
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 360.0614013671875
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 363.1102294921875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 360.4668273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 364.52447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.47320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.2945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.2722473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.63433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.5438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5633850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.4119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.40106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.1318664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.7935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.33502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.2286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.25323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.50701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.3362121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.76019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6855773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.4732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.58123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1504211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.55474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9678649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.81396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.5015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.3573303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.29217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.9853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.5557556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.5598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.5822448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.48895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.07586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.3041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.38916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2806091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.7379455566406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.2104187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1241760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4862365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.9251403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.68182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.2774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.9769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.1292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.17303466796875
############ Running episode number: 278  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.3504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.84185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.1565856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0388488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.478271484375
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 360.28192138671875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 1 21.0 960.915933313 (10.931193889570471, 9)
loss 360.2544860839844
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 362.8614501953125
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 360.7388000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3332214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.3702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.60076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.7598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.24713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6660461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.2532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.2549743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.08441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.45355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.3873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.9728088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 363.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.7376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.36541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.7326965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.32513427734375
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 361.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.6158142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.99835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.96026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.3265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.13861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.7485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.53485107421875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 359.70465087890625
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 360.9246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5602111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.4012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.01617431640625
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 0 21.0 1179.43847566 (15.836943704090487, 9)
loss 360.5185852050781
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 360.2911071777344
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 361.2535705566406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.3927917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.1275939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.0500183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0307922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.5976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.1690368652344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 363.5274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 364.05224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.8352355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.89300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.032958984375
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 1 20.0 1278.56065924 (16.84211602880065, 10)
loss 360.5397644042969
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 1 21.0 1271.03516211 (16.237094554670044, 9)
loss 361.3667907714844
Current State,action,reward,Response time,Next State:  (9, 16.237094554670044) 3 20.0 1273.04930988 (15.950694610794756, 10)
loss 362.0191650390625
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 358.694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.53765869140625
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 2 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.71575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.1765441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.13641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.86798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.69140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2321472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.12750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.4039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.0577087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 363.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.8209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.3868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.7915344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.88543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.3130798339844
############ Running episode number: 279  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.62188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.54571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.84722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.4951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.4827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.10504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.55133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.6420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.4674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.83197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.9385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.4439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.0341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.9287414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.41192626953125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 360.5865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.66912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.1533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.54815673828125
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 0 21.0 916.124940439 (10.42733414151318, 9)
loss 362.2781982421875
Current State,action,reward,Response time,Next State:  (9, 10.42733414151318) 3 20.0 968.937023414 (10.388469398680568, 10)
loss 362.1006164550781
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 361.4903869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.3921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9401550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.44134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.1861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.2986755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.5384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.6516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.3636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.5038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.7574768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.2574157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 362.0101013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.6070251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.8446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.1366271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.2616271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.9817810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.8670959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.4029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.6600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.06915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.6479187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.41461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.34698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.23236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.95355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.69683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1073913574219
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 0 21.0 1354.73183582 (19.02839494033929, 9)
loss 363.5484924316406
Current State,action,reward,Response time,Next State:  (9, 19.02839494033929) 3 20.0 1419.16011 (19.286321916040979, 10)
loss 360.5914611816406
Current State,action,reward,Response time,Next State:  (10, 19.286321916040979) 3 19.0 1400.68584406 (19.340464848017284, 11)
loss 361.2228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7455139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.21173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 363.66961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.51531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.3968200683594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.7405700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.88922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.0368347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.59259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.5103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1637878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2262878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.80181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5316467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 366.7055358886719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.8828430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.4523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.7083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.79132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.80413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.4027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.3435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.4397888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.155029296875
############ Running episode number: 280  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.33148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.3447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.7803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.7013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.5600280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.2530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.0109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.2690124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.85205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.27099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.31475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.38720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.52349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6487731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.0730895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.9964294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.5856628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.10638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.40997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.6025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.3041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.26544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.9307861328125
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 1 20.0 913.381595845 (10.30224719189987, 10)
loss 361.8931579589844
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 0 22.0 924.133757854 (10.278181486298042, 8)
loss 362.56536865234375
Current State,action,reward,Response time,Next State:  (8, 10.278181486298042) 3 21.0 971.749689939 (10.268274366284802, 9)
loss 363.2354431152344
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 361.6827392578125
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 361.70330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.5118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.8411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.24658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.4926452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.4477844238281
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 362.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.0690002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.41680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.8882141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.94073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.93255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.01507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0815124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.9739074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.2486267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.1275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.3922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.0923767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.47222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.6275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.15032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.4436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.15753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.7181091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2344665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 363.0310974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.7082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 365.2370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.39508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.18682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.08245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 363.08087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.8703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.37078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.98260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.4659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.90338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.70770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5893249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.9831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.25933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.5134582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.70703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.9662780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.2911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.2786865234375
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 362.3299255371094
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 361.0064697265625
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 361.7593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.5190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1283264160156
############ Running episode number: 281  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.1448059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.3473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.5103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.9655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.27801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.5120544433594
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 360.23663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.01263427734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 4 19.0 939.691239608 (10.819208572963639, 11)
loss 362.3113098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.5016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2179260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.2584533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.47674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8828430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.4625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.55255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.25384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.8259582519531
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 2 19.0 917.051082408 (10.344006106602812, 11)
loss 359.6279602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1146545410156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 360.8470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.54595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.46197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.5570373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.3623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.9270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.2204895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2031555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.7388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.67156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.1337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.49835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.31427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.32403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.5701599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.6009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.59698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.65118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 363.4408874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.7272033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.1961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.5954284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.61236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.4547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2688903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.92156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6905212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.6774597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.1119689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.55169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.40008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.78656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.3160705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.3599548339844
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.6679382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.01239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.98309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.4717102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.97772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.54815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1828918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.9996032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.22723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.0881652832031
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.6199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.51263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.39202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.7306823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.5424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.7004699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.1358947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.2061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.5367736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.7713317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0686340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.20648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.5323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.16314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.52911376953125
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 363.8509521484375
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 360.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5110778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4163513183594
############ Running episode number: 282  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.3992004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.1588439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1936950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.3413391113281
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 0 21.0 962.898956888 (11.027107764209074, 9)
loss 360.4013977050781
Current State,action,reward,Response time,Next State:  (9, 11.027107764209074) 3 20.0 1000.33221268 (10.995673623987257, 10)
loss 361.11492919921875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 360.60601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.39593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.2773742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.19488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6463928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.8297424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 362.4458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.7142028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.8714904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.33758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.15338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.68115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.5150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.3080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.4871520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.1290283203125
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 359.94219970703125
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 360.5012512207031
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 361.0281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.72802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6919250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.3587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.14453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.31219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.76129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.83905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.1498107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.1836853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.51654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3428955078125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 360.55877685546875
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 1 20.0 945.391786838 (11.039747673816453, 10)
loss 360.439697265625
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 361.5516662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.1044006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.5483093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.82147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.7656555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9256286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.4000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.9931335449219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.8386535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 363.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.47711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.41729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 364.91265869140625
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 360.321044921875
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 362.07415771484375
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 362.3390808105469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.6145935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.17999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.2065124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.94775390625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 360.3533020019531
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 361.54742431640625
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 361.741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.03631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.2012023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.4580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.42694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.5929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.0773620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.3949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.3267517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.1021423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.3641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.58099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.38720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.55712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.4441833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.37261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.4053039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.5755310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.64404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.6260681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.1915588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.7757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.1645812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.40045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.1802062988281
############ Running episode number: 283  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.50885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.4710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2577209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 363.03363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.5289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.8021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.3135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 364.13323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.2546691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.8504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.68548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.6887512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.6145324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.4953918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.1562194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.2015075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.4347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.5009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.8065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.5814514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.9300842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.5008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.1798400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4074401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.2742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.5877990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 363.026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.2360534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.6666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.98590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.1431579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.3563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.2655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.7467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.98223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.98968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.5224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.25341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.79351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.93072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.5301818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 363.6504211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.7872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.4598083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.7879333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.2724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.7340393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.12548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.4815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.1134948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.1768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.6055603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.2870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.1774597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.6403503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.1671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.2400817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.15106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.89593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.4290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.7862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.9481506347656
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 360.5391845703125
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 361.180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.19683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 363.11175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0494079589844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 4 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.20635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.9706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6371154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.71929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4258117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 364.8755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3726501464844
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 2 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.2333679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.20623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.8900451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.997314453125
############ Running episode number: 284  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5370788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.13458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6976623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.8912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1634826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.11663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.9459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.71771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.37567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.42138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.9935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.97698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.87335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.7422180175781
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 0 21.0 925.789969445 (10.489125480251131, 9)
loss 362.0926818847656
Current State,action,reward,Response time,Next State:  (9, 10.489125480251131) 3 20.0 972.171495057 (10.448897752470936, 10)
loss 359.89599609375
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 362.4949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.1971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.22576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.41619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.7323913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.1665954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.6275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.92645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.3683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.1477966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.2499084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.3372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.35479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.1381530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7433166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.2461853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.9289245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.1059265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.2237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.6935729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.3734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.42022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.8403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.13031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.58624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.1557312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.7066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.56585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.9590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.9468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.1914367675781
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 1 20.0 1301.78496219 (17.944480812078613, 10)
loss 363.51806640625
Current State,action,reward,Response time,Next State:  (10, 17.944480812078613) 3 19.0 1329.50910109 (18.385807405229915, 11)
loss 359.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 363.5963439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.77593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.3330993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.1664123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.95977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.49163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.3351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.8128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.32830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.2247619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.3495788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.2680358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.94122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.48919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.1498107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.3902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.6823425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.6741027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.10943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.6558532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.5713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.7540588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.71533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2036437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.1275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.3050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.3154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.6090393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.5226745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.31390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.3545837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.5958251953125
############ Running episode number: 285  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.3988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.2056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.19219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 364.79412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.78033447265625
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 362.58905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.91998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.05157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.0768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.48138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.8999938964844
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 361.59954833984375
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 359.40380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.5639953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.8199157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.6133117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.47344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.18475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.93743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.0943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.4405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.85064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.40472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.4879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.2745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.6785583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.1551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 362.16192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.1921691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.7342834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.6417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.8083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.2818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.12646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.66131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8262634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.9483947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.92462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 363.4105529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.37030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.91461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.1300964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.2640686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.13720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.76416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.2820739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0220642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.43511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.10400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.05145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.2354431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.99249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.17303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.4986267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.3038024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.8417663574219
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 0 21.0 1339.12370397 (17.82724819986867, 9)
loss 360.60357666015625
Current State,action,reward,Response time,Next State:  (9, 17.82724819986867) 3 20.0 1356.28600579 (17.229782241685768, 10)
loss 363.5310974121094
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 361.0536804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.75250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.7799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.3188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 363.67205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.4108581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.47412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.53350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.3796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.0848083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.7846374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.4133605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.2633361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.0663757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.21783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 363.26458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.6295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.03350830078125
############ Running episode number: 286  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.25146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.97296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.0072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.52825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.04779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 363.9371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.7891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.96710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.1313781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.33953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.5260314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1147766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.05401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 363.1724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.86553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.36236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.2294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.88446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.4891052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.53326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.69781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.3935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5457763671875
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 361.0231018066406
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 359.4679870605469
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 4 19.0 921.2700698 (10.276491935146446, 11)
loss 360.37847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.5391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.3064270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0762023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 363.0255432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.2154235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.4327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.5741882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.18408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.0505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4066467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.02569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.9625549316406
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 361.5858154296875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 4 19.0 1040.79092857 (13.168618569876575, 11)
loss 361.7929382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.22906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.05255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.67010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.39190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.39068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.6309509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.7751159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9153747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.71136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4120788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.4988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.8191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.7714538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.40203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.9891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.4959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.4081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0814514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.52801513671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6239318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.16302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.10418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.9954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2228698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.7351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0877380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0969543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.5465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.80126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 364.410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.5956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.15277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.982177734375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.8004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.4910583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.4622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.1522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.1650695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.3194885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3818359375
############ Running episode number: 287  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.87847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.20440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2568054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.3248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 363.4496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.59613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.8559875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.72125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.4864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.54022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.3303527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.36480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.15728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7377624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.1084289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.44793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.5718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.4438781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.2189636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.7280578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.71466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.50360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.2965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.8976135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.4857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.3730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7629089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.83319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.30609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.5404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.77777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.0538024902344
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 2 19.0 937.284736847 (10.924797168745895, 11)
loss 360.1107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8363952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.93780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.66571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.74871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.4107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.83544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.7065124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.8631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.37005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.1542053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.8089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6929626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.00946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6551208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.6668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.35064697265625
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.52105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.8264465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.20977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91656494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.71539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.5920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 363.5072937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.43829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.35504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.11358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.6108093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.17144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.8343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.6929626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9314880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0735168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7159118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 364.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.6142883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.44598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.38226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.5414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.2275085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.5758056640625
############ Running episode number: 288  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.38055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.5266418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.1694030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.36883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.63995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.7998352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.74542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.3033142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.74169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.4785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.6057434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.6138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.83392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.5200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.1930847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.1440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0652770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.08929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.7641296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.53460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.4942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 363.17559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4475402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.08966064453125
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 0 21.0 912.67468196 (10.24826025489064, 9)
loss 360.885498046875
Current State,action,reward,Response time,Next State:  (9, 10.24826025489064) 3 20.0 959.563389179 (10.276491935146446, 10)
loss 360.1948547363281
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 362.2472229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7755432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.4024963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.13189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 362.31060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.30694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.7816162109375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1092834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.15423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.23150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5276184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.1260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.0396423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.1509094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.5013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.4404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.5276184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.78509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.54901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.4521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.1261291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.6726989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7691955566406
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 1 20.0 1339.64749699 (18.671267839956315, 10)
loss 360.5174255371094
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 359.6180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.48223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.39263916015625
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 359.860107421875
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 362.78143310546875
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 361.21240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.3959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.99859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.6059875488281
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 362.04888916015625
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 363.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.0956115722656
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 2 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.6256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.1007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.10955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.7479553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.6285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.80963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.1195983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2099914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.5685119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2790222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 363.2446594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.6697082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.40118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.9478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.0850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.6802062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.19122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.0682373046875
############ Running episode number: 289  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.0123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4861145019531
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 361.37152099609375
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 361.6217041015625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 360.7928771972656
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 361.36175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.32769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.28912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.80523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.5261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.2912902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 363.5671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9515075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.2869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.79498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.77398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.1850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.6619567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.19317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.7985534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.6629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.6375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.0768127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.37359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.57830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.0191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.65692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.2072448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.60919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.06475830078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 4 19.0 913.272125304 (10.333617326102203, 11)
loss 360.78863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.7938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 363.30816650390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.5320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.2829895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 363.5938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4136657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.9698791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.25286865234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.2703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.27935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.2555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.41912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8221130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.4411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0562438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.76678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.19842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8393859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.88800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.5854797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.6012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.4221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.7484436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.1862487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.21746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.6860656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.0273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6999206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.1912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.9327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.2171325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 362.40814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1907653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.2594909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2234191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.2192077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.3291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.2627258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.7174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.4114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.2269592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.6455993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.93060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.74920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.7913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.26300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.01153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.2003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2448425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.3896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8029479980469
############ Running episode number: 290  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.1882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.49346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.7049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.40118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.9833068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.3852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.9284973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.6918640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.5111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.63848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.9395446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.4471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 363.79156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.69281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.2817687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7453308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.5395812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.8211975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.8083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.62200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.28033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.1250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.8564147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.49578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.3408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.70574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.4697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.6639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.57342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.06646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.6213684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.53253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.2486877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.23687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.3391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.3381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5002136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.1036682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.2178039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.89141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.72869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.10760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.8274230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.2869567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.57916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6692199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.67413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.9573059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.6722106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1752014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.5047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.2422790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.12139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.04412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1931457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.24639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2718811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.69873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.28436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.7899169921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.4969177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.33477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.3479919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.0365905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.8089294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6868591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.00689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.0373840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2902526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.0033874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6950988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.0287170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.0166320800781
############ Running episode number: 291  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.6724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.8387756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8131408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.21270751953125
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.09820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.72845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.20562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7276306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.30224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.02410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.0867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.4245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.75067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.16461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.17608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.7924499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8012390136719
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 360.7560729980469
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 361.8808898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.73260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.41094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.80999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.0206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.1594543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.9017639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2933654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.8350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.6702880859375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 359.9352111816406
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 359.8775939941406
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 360.1668395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.7709655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3977355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.2960510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0440368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.21929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7678527832031
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 2 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.84552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.4475402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.7529602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.3331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0769958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.39068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.75830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.8088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.6314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.1953430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8594055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.42822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.9642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 363.2684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.41790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.3176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.25750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4536437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.5447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.80999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3893737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.11676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.2296447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.86224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.2001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.8438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.2779235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.3519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.3701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.5805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.22857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.92572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.41314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.3661193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.7930603027344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 4 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0085754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.8760681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.1631774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6393127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9026794433594
############ Running episode number: 292  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 361.8383483886719
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 361.0563659667969
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 360.8446044921875
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 4 19.0 993.95437024 (11.469111876584304, 11)
loss 359.7457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.3269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.32159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.42510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.7186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.44720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.1020202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.26385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2734069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0327453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7077941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.6521911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.6440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.1344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.4530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.30059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.5819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.72265625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 2 19.0 910.69972028 (10.335411397720526, 11)
loss 360.2112121582031
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 0 21.0 914.247384359 (10.305649118067803, 9)
loss 362.3339538574219
Current State,action,reward,Response time,Next State:  (9, 10.305649118067803) 3 20.0 962.567412952 (10.24826025489064, 10)
loss 359.2362365722656
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 362.1336364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 363.6016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9860534667969
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 360.41937255859375
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 360.823974609375
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 359.5660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.57684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.8465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8434143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.7687072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9309997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.7841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.3877258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.4256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.6881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.6378479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.3326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.72760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.6496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.6239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.3817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1745910644531
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 1 20.0 1238.24711194 (16.871606159345866, 10)
loss 361.04425048828125
Current State,action,reward,Response time,Next State:  (10, 16.871606159345866) 3 19.0 1272.5994393 (17.534967586021782, 11)
loss 358.6093444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1918029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.8336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6352844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.7093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.6680603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0572204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.34649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.91754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.13800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.2186584472656
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 359.3508605957031
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 360.0576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.1701354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.7508850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.62664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.13232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.62774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.3888244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.3638000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.6153259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4219665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.7522277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.77642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 365.3127746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6119079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.15216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.43743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.9393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8670349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.26751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6036071777344
############ Running episode number: 293  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.3750305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.6496276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.1861267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.5895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.23626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.5567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.73626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4170227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.9059143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.6016845703125
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 361.55548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.2613220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7580261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0705871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.5383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.6316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.7855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.82623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.5517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.3318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.2975158691406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7221374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.0782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3437805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.8763732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.8933410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.2496032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7681579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.67889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.99462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.6759338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9208679199219
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 1 20.0 929.522052234 (10.771376986314287, 10)
loss 361.6933898925781
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 361.0791320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.2436218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.71685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.3411560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.3580627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.56915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.18505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.34906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.74554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0019836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.1296691894531
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 362.8466491699219
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 362.3121032714844
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 361.27532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1398620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.40704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.67034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6205749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0960998535156
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 360.6567687988281
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 360.94049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.0187072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.26531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.29888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.9405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.6844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.57623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.21441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8453674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.4433288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.60455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.04949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.39703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.39984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.5301208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.38507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.6876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6045227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.3763732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.14483642578125
############ Running episode number: 294  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.78521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0742492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.16473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.1419982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.34649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.2811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 362.7797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.2312927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4110412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.3814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.23529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.4053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7353210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.36370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.3189392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.2699890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1092834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.86895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.21087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.6974792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.48822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.7503356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.5992736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5676574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8492736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.3252258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.0248107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.8155212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.18817138671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 360.77496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9781188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5459899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.7075500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 362.54132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.2131042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.50732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.3327941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.2947082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.0439758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.9629211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0985412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.6536560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.9427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.5347595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.53961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.50506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6218566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3330383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.39532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.41644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.4150085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.3879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.5970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.26629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.33099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.90240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.78692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.14105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.7572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.6829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.7500305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7593078613281
############ Running episode number: 295  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.7231750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8000183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2179870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.06146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.4436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.8122863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.6717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1419982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.17669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.7591247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.0753173828125
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 0 21.0 937.12351295 (10.772009508959538, 9)
loss 360.4839782714844
Current State,action,reward,Response time,Next State:  (9, 10.772009508959538) 3 20.0 986.979077927 (10.644925616761762, 10)
loss 361.3541259765625
Current State,action,reward,Response time,Next State:  (10, 10.644925616761762) 3 19.0 942.310823749 (10.58735855349979, 11)
loss 360.5768127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.39923095703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 359.12939453125
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 360.3471984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.1163635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.4205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.02081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.89202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.50634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.3775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.4275817871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 4 19.0 910.69972028 (10.335411397720526, 11)
loss 361.2025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.8113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9772033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.2967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.74566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.15716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.04974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.6625671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.3809509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0707702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.5359802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.2470397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.21441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.1812744140625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 4 19.0 992.681522335 (12.19918626616789, 11)
loss 359.65313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.4142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.7913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.47418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0565490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2802429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1692199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.8630676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.2052307128906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.17474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.56024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.28662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.17449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.7198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.62066650390625
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 362.48590087890625
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 361.28814697265625
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 360.5643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.6556701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 363.0194396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.82110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.3575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.8330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3639221191406
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 360.17315673828125
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 359.2149658203125
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 359.211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.92352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.1759338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.9478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.2421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6045227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.2057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.55902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.5995178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5662536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.09649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4433288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.1434631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.97808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.2095642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.62872314453125
############ Running episode number: 296  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.4773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8047790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.0865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.3058166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.91168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.2860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.7534484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7936096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8941345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.92852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.27734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.53350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.64495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.5940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.6280212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.4942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7940368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.0829162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.5012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7225646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6195373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.7545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.0455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.58197021484375
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 360.7973937988281
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 360.61309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.6180725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.96710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.72821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.82135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.84136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.08978271484375
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 2 19.0 929.522052234 (10.771376986314287, 11)
loss 358.91693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.7767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.2190856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.6523742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.59417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.3789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 363.5659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.93817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.24560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.4608459472656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.8096618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.5245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.7708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.5546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.22705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.5766296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4712219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.57244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.4970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1139221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3962097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.5200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9528503417969
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.20513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.4636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.4521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.5252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.4500427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.4848937988281
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 359.6547546386719
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 361.86041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1095275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.01092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.0618591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.8683166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.69390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.3528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.05535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.2853698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4692687988281
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.20892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.5450744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.5138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.6510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.3062438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9479675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.2984924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.15185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.6004638671875
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 2 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.37640380859375
############ Running episode number: 297  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9705505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.7146911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.25677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 363.2485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.24591064453125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 1 20.0 949.137050055 (10.931193889570471, 10)
loss 359.9716796875
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 359.7464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8741760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.1456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.20416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.1255798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.8520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.71270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.3721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.4296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.7619323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6733093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.29827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.4619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.7771911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.0802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.9222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2222595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.3984069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.48419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0854797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.1123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7536926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2619323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.6551208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7967834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.77655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.2110900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8739929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.2294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7002258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.1489562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.05645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3039245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8764953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.7470397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.0587463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 362.49920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.4788513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.8781433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.2958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.0357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.37542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.8205871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.4355163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.5423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.1263732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.9090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.5799865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.0937805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.16851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.87432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.3621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.5201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.8184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.7535095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.5530090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.48724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0793762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.94818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.4305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.0939025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.6780090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1166687011719
############ Running episode number: 298  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.3954772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 363.15570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.1924133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.53887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.8649597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.1620178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.66778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.3027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6614685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.0962829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.6431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4688415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.74005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.33282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 364.1735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.25823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1157531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.23858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.44598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.6222229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.4383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.43505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.2525329589844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 359.86395263671875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 362.1202087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.79986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.24688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.4681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.87799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8863220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1284484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.0877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.68603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.4029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.3713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.830078125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 4 19.0 951.466016946 (11.271571944085663, 11)
loss 361.0509948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.74169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.16448974609375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6694030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.5206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.3053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.21600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5393371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.4681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.4813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.6176452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.45989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.27801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.68896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6720275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.6761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.6834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4068603515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 4 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1772766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.14239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.14111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.9842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.07574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.5726623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8878479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1761779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.6205139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.85430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.19012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.8544006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.3982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.5355529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.18365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.699462890625
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 1 20.0 1257.77263112 (16.845818065953559, 10)
loss 359.9853210449219
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 360.43499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.3841857910156
############ Running episode number: 299  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.79156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.4383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.6992492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.20654296875
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 359.905029296875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 361.18048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6413269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.27020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.4022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.44146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.57220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.6440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6538391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5127868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2426452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.92572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.04595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.22174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0289001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.26495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.0790710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.64862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.23870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.4005432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.5347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.43487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.5671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9609680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.2712097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.47589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.6082458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.61083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.2933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.4146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.24652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.2220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2014465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.5113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.1908874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.83306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.4925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.3815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7025451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8266296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.41290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.1514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.64324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.7359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.2915344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.9196472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.87554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.33221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.1623840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.34747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.0398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.2401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5987243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5420227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.7677917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.4877014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.0836486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.2091369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6855773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.36932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.23822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.45611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7432556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.47674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.51922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.3642578125
############ Running episode number: 300  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.22308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.6785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.2837829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.42633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.1837463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.60150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.2945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 363.3818054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.34283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.4878845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.4070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.2806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9045104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.7801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.26116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.1063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.5553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.54833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.5382995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.8925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7014465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.8661193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.55255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.4791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.64984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.80389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.15716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.82501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.02508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.33685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.51239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.45068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7480163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.3798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.54791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5443420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.62542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.17919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6781921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.80291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.74249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.0341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.18255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.74884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.7511901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.3752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.56915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.2016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.77972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.31292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.1698303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.09698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.62103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.1929626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.5242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.46502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.85125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.33209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.3901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.4182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8681335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.1046447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.5328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.78173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.0381774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.84710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.9409484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.5342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4765930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0489807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.21563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.39202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7299499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.4783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.1761779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.50396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.9871520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.1433410644531
############ Running episode number: 301  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.05535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.1867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.6393737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.2801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.52099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.9615783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.35992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.4700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.54638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.6686706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.19024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.5686340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.94036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.29644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.4217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.9919128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6978454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.91339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.8119201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2091979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8746032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.44879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5513610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.5462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8671569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.7590637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.15106201171875
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 361.37310791015625
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 358.75830078125
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 359.1416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.2256164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1143493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.0655822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3129577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2785949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.9557800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.3403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6866149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2505798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.33709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7020568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.7364807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.15924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.8677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.74114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.42724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.1724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.1466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.35540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7320251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.6654357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.34173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1553039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.12982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1093444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.9055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8542785644531
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 361.2854919433594
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 359.7607727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.0342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.8287353515625
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 0 21.0 1207.88915169 (15.954793861767499, 9)
loss 358.9535217285156
Current State,action,reward,Response time,Next State:  (9, 15.954793861767499) 3 20.0 1258.27226176 (16.004586266677634, 10)
loss 362.1362609863281
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 360.20806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.1869812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.29827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.4355773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.3971252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.19232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9036560058594
############ Running episode number: 302  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.3005065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.1865539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.29339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.0465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.02984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.06805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0438537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.38934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.8304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.4790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4281311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.56201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.8414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.80621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.79168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.49072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.90057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.1551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.11285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.2369689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.8887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.24871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.3959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.9477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.8486022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.27777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.9819030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.68701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5641784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.56793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6579284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3658752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.0829772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.46099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7505187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.6949768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2195129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.81427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.12042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.57611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3724060058594
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 359.5730895996094
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 359.7915954589844
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 358.75360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.57586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.21722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.4414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.0682678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.5868225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.0481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2008972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.9141540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.12213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.733642578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 4 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.1624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.7237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.67730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.6123962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.85382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.05792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.07159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7259216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1178283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.67919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.31866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2923889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1697692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7499694824219
############ Running episode number: 303  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.69989013671875
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 2 19.0 982.049698353 (11.469111876584304, 11)
loss 360.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.4349670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.35992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.1578063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.09637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.2192077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.75677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3059387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.25732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4021301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1028747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.46807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.98797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6436462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4788513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0955505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.8070983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.7666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.25830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5949401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2019958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.82135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.03033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.46014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5399475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.29705810546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 4 19.0 913.272125304 (10.333617326102203, 11)
loss 360.2091979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.2202453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.1441955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.0224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.2811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8550109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5723571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.6268615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.51141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0808410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.2666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9825134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1855773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.2571716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2953796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.16607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2585754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.3821105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.70965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.93292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2309265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.77142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.7491760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.63812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3214416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.22491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2370910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.9728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.26544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.73382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.3246765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.4268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.46044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.6173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3380432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7030334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9321594238281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.9664611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6587829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.77178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4671325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.13238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7144470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.3430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.572509765625
############ Running episode number: 304  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9122009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.57232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7934875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.1513366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.27069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.6410217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.3951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.1289367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.4136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.28326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.4482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.7950744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.27667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.68218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.3977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.17718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.09783935546875
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 359.7044372558594
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 359.86602783203125
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 359.6814270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.25506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.7395324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5967712402344
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 358.8111267089844
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 359.8746337890625
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 359.14068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.0029602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.30035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.3612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.9119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.1513366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8581848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.7843322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.06884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.5440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.0910339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.80859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.7579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.5711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 363.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2120666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.7980651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.18707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.99212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.3595275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6252746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.77130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.1477966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.4414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6324768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0892028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.22662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.33087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.1867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.59002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2561950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.55145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.7552795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.1617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.6177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5672302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6982116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.73272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.7735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.5468444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.5726623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.1390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.7583312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.64501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1764831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5994873046875
############ Running episode number: 305  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7459411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5716247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.3134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.56097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.06524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.73590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7752990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.92169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.33575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.53076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0738830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.5628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.2752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4143371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.27593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.7980041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.4571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2698059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.97900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.81024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5829772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.2453918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.5611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1532897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.2204284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.2018737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8791198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.96136474609375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 0 21.0 919.032945938 (10.546025383098053, 9)
loss 359.8153076171875
Current State,action,reward,Response time,Next State:  (9, 10.546025383098053) 3 20.0 975.14992417 (10.655373370049301, 10)
loss 359.7000427246094
Current State,action,reward,Response time,Next State:  (10, 10.655373370049301) 3 19.0 942.865015335 (10.624473674922116, 11)
loss 360.1826477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.5306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.90069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.43865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.72259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2039489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.18829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0031433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.0342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.78717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.75469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.0674743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.35162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.5523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.1194152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.94073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1614074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.2682189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.0971374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.08587646484375
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.34442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.0522155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.7977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.43157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.4437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.5895080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.45819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.2120056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.49847412109375
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 1 20.0 1204.59090422 (15.892373986997768, 10)
loss 359.2208557128906
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 360.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.2992248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.75445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.1339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.9719543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.31634521484375
############ Running episode number: 306  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.02423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.39190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.17633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.0621032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.1660461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.5324401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.04119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.14447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.6106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3279724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3011169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.69830322265625
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 360.140869140625
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 359.7485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7203063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1225280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8664245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.40045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.2442932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.3801574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2070617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3081970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.3533630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8354187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.3985290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.6081237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7965393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.60418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.80108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.7192687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.57647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6810607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.9735412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.1792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.66387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.75677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.4849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.66473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6706237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2032165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.91046142578125
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 359.76519775390625
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 359.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6918029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7812805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.1534423828125
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 359.2076416015625
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 359.61456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5899353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1956481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.50836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.9934997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.2459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6620788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6376037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.8113098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.5532531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1579284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.2893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.33990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5682067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.8890075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.67669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2546081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.3204040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.5388488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7348937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7121887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3872985839844
############ Running episode number: 307  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.82525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.8630676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.58599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6567077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.1836853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.18988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.4281311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.5266418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.5802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.74774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.55999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.2362365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.0533752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0114440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8731384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.2836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2217712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3631286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.95819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7613220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8619079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.5106506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.67608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1285705566406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 4 19.0 917.140709305 (10.425974763084863, 11)
loss 360.3866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.49951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.81817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.31573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8359680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.99981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.5970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.82318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.2359924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.34375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10333251953125
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 359.336669921875
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 360.1434326171875
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 359.170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8479919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.59588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.98626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.6268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2887878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.08154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1295471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7916564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.6328430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3103942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.13104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.56927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1813049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.6925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.37005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.9295349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.17584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.73968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.76263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.9277648925781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 4 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.7409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8918151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.64605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.44305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3806457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1119689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.38995361328125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3192443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.3614807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.0753479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6015319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2159729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1013488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.360595703125
############ Running episode number: 308  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.83197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.54315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2464904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.2452697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.61102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.3042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.9925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3636169433594
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 359.57354736328125
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 359.2821350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.6921691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.76177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.2137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.18865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7175598144531
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 0 21.0 919.871906942 (10.370942817486826, 9)
loss 360.4015197753906
Current State,action,reward,Response time,Next State:  (9, 10.370942817486826) 3 20.0 965.985215893 (10.42733414151318, 10)
loss 361.3340148925781
Current State,action,reward,Response time,Next State:  (10, 10.42733414151318) 3 19.0 930.768881517 (10.388469398680568, 11)
loss 360.3345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.09844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.08526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.5386047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6854553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.1959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8409118652344
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 1 20.0 909.046654676 (10.236272697871373, 10)
loss 360.3363342285156
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 359.72515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2480163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.1541442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.74237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.6671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.35516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9005432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.26300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0293884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7184753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6844177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.4511413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.72686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.10650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.06268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.7861022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.9875183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1365051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.1670227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.5807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2136535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.4747009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.6276550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7053527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.17236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.8205261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0270690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.51190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.4737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8790588378906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 4 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.64495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.73565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.0297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.62353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.2742004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.58258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.0761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.8057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7868957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.7120056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.6938171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1111145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7620544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.84820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.2590026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.47772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.6734313964844
############ Running episode number: 309  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 362.9290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.30999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3270568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.23004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.7262878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0828552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.84393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.7893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.7013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.1060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.41973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.30084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.5498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.41229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.5181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.74505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.4537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1778259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.2566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6410217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.7745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9254455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.74713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.63250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7478942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.5827331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4977722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.2362976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.01922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.34222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.73944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.23272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.5599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.2900695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.0701904296875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 359.1162414550781
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 360.13177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.26336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.70330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.20611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.56500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.0450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.0585632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.4125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.78662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6492614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2586364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.4103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1953430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.5132751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1667175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.53826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.40765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.8211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.8182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.4591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.81494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.3047180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8062438964844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 4 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.6611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.65496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.6561584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3685607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.39605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.74176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.5712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.76220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.23028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.69403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.35894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.51080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.49102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4165344238281
############ Running episode number: 310  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.4165954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.83770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6247863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.0334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.8448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.6581115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8591613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.47149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.95001220703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9883728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.10205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.0946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.3172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.3929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2275390625
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 0 21.0 919.412094444 (10.44185150623065, 9)
loss 359.36102294921875
Current State,action,reward,Response time,Next State:  (9, 10.44185150623065) 3 20.0 969.696935814 (10.370942817486826, 10)
loss 360.3629455566406
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 360.07861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.2189636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 363.116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.3267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0605163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.0293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.1146545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4823913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.9093933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.3275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.67431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.0367736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1383972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0225524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.62286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.6488952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.6790466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.20538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8140563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0947570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.19696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.39959716796875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 359.9996032714844
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 1 21.0 1040.79092857 (13.168618569876575, 9)
loss 361.3134765625
Current State,action,reward,Response time,Next State:  (9, 13.168618569876575) 3 20.0 1112.429735 (13.649658108197247, 10)
loss 358.89434814453125
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 0 22.0 1101.69413046 (14.283719188889453, 8)
loss 359.4520263671875
Current State,action,reward,Response time,Next State:  (8, 14.283719188889453) 3 21.0 1205.85251983 (14.677479537099185, 9)
loss 359.3400573730469
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 3 20.0 1191.41116041 (15.353965082180355, 10)
loss 361.3942565917969
Current State,action,reward,Response time,Next State:  (10, 15.353965082180355) 3 19.0 1192.09754638 (15.836943704090487, 11)
loss 359.0628356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.3608703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9156799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.3440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.5622253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.5490417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.54986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.9352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.7379455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.29229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.6148376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.8671569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.5291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.17169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.2050476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.3464660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.4836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.46380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.5969543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.9540100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.3170471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.09564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2145080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.76910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.72412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.7564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.72784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.86834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.7667236328125
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 0 21.0 1221.34827555 (16.229253414601111, 9)
loss 360.9580078125
Current State,action,reward,Response time,Next State:  (9, 16.229253414601111) 3 20.0 1272.63886489 (16.295120821876548, 10)
loss 360.8908386230469
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 360.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.8057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0920715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.30145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.8743591308594
############ Running episode number: 311  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.4361877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0260314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.3302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.55615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.2198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0811767578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 4 19.0 962.898956888 (11.027107764209074, 11)
loss 360.8674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.328369140625
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 360.2933654785156
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 360.2415466308594
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.2454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5439147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.2103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.0191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.56719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.4930725097656
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 0 21.0 919.104778396 (10.388469398680568, 9)
loss 359.6436767578125
Current State,action,reward,Response time,Next State:  (9, 10.388469398680568) 3 20.0 966.902645924 (10.344006106602812, 10)
loss 360.73358154296875
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 359.7825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 362.5395812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.9354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.4129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.22357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.0769348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2070617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.6899108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.8839416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.65771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.32000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.17242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.1088562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.59661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.4327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.1654357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.91412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.7095642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.14727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6186828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.5317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.6101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.1268615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.1614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.4841613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.0081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.4592590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.28436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.9690856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.37939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.72369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.2047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 362.2129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.6938781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.78466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.26129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.0207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.8882141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.7494201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2587585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.58892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.9951477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.8109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 363.0499572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.45648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.89227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.02716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.55706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7809753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.17327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.4171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.9175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.1358337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1546630859375
############ Running episode number: 312  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.3216857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.1530456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.2960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.0109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.9844665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.5013732910156
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 0 21.0 939.691239608 (10.819208572963639, 9)
loss 362.17132568359375
Current State,action,reward,Response time,Next State:  (9, 10.819208572963639) 3 20.0 989.449716 (10.768325938188134, 10)
loss 358.4928894042969
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 360.6535339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.20263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 363.0532531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.96337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.50726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.2315368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.78961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.84783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.88946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 362.31829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.42083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.4596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.6331481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.60931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.85089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.6700744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.5957946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.8757629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.5834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.4588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.65606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 362.7963562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.53729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.8971252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.5406799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.21600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.60308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.8774108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.4213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.1999816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.40576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.6634826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.45880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.3624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5126647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.5019836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.79327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2167053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.73284912109375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 359.5672607421875
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 360.3080139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.0639343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.74102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.83868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.4545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.1042175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.6904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8561706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.1168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.9707946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.8306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.8833923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.73504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.8542175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 363.5384826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.45135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.1802673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.6288757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.52520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1554260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.7562561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.4758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.71014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 363.3557434082031
############ Running episode number: 313  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.9323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.3305358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.18438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.1383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.17425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.5362243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.0050354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.1712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0214538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.03631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.60699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.55572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2142028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.99444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5193176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.1079406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.4916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.61224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.77972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.24151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 363.1080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2359924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1298522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.19952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.3912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.09906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4469909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.0820617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.7733459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.66162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9320373535156
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 2 19.0 916.069372847 (10.316955310454549, 11)
loss 361.21539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.68017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5170593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.96240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.7741394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.66741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.4713439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.7952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.1015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6083068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.7371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.61773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8955383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 362.7078552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7113952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9642028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.1668395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 362.98529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.1091003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2200622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.64923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.7460632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4840393066406
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 362.646240234375
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 359.7906188964844
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 359.5939025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.1543884277344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 4 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.9295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 362.9913635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.1766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.42535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.1197204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.1671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.58819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.8477783203125
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 359.4259338378906
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 359.459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.52520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.0988464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.51031494140625
############ Running episode number: 314  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1271057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.2312927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.53997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.0063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.53155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.3660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.1054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 362.3280944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 362.30609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.58758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.3568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.64349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.27276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0960388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.91876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.6432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0539245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9756774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.41229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.73944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.5738830566406
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 362.28985595703125
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 360.6158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.2411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.64495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3565979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.0834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.6194152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.4530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.81756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6170349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3821105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.2609558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.6454772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.11920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7393493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.87640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.94970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.50054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.86376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.29583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.2958679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.2821960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3461608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.2813415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.44091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.32830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.549072265625
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 359.9251403808594
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 360.82940673828125
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 359.1531677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.1304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.5637512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.2774353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.4487609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.11053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0005798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.10125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8617248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.78753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.74151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.2968444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.1451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.3835754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.27728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.4501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.1636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.6654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.6031188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.1466369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.36798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.3392639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.1183776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.1092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.0133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0381774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.5014343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.6318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.7887268066406
############ Running episode number: 315  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.54119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.84234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.0138244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.3755187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.43572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.6662902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.0505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.2799377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6191711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.8750305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.38494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.23675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6624450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.5197448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 363.7118225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.2630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.81524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.26348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.4412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.1017761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.1668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.2264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3219909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 362.4738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.17950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.49676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.1562805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.16619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.74652099609375
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 359.95587158203125
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 359.9162902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.66058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.2142028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.2555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7077941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1382751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.4928894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.8018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.17681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.0140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1592712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.5480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1307678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 362.8011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.86126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5155334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.28497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.1843566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.24609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.2103576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.1594543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.4855651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.56640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2318420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6833801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.1308898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.8729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.1550598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.15167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.4237365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.80401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.1825866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.3902893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.7309875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.3932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.80963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.6889343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.8458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.80377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.4920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.75860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.5716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.7481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.20684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.005859375
############ Running episode number: 316  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.1885070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.13201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5299987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.2336120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.56640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.4346008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.2367858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.2810363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2745056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.3120422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.31365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.0897521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.5533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.99700927734375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 361.0809326171875
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 360.3673400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.5448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.5794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.7074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.4942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2197570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7286071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 362.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.53173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 362.65936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.6112976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.63629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.7239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 363.85430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.54888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.16937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6261901855469
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 0 21.0 917.140709305 (10.425974763084863, 9)
loss 359.2920227050781
Current State,action,reward,Response time,Next State:  (9, 10.425974763084863) 3 20.0 968.865866662 (10.546025383098053, 10)
loss 360.0482177734375
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 361.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9303894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.2571716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.6417541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.5299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.4502868652344
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 2 19.0 984.787563682 (11.819721938468785, 11)
loss 360.9000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.8249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6404113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.7666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.6919860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.7729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.34564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.6667785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.1450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.3228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.9986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.4582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.61962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.1622009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.53753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.2104797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.2262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.7154846191406
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.2127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.5985412597656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 4 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.45037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.3402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2079772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.26751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.3583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1346130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.13079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.3320007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.3663635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.72357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.7042541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.81341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.9041442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.3752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.68487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.7119445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.89154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.877197265625
############ Running episode number: 317  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.61376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.5015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.6201477050781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 361.09857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.6086120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.65625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.4612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.17974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.71148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.91455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.1598205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.2485046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.7281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.52288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.7207946777344
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.08734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.91339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.7505187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.90618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.6144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.4635925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.8201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.25360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.72021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7490539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.3527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2887878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.3826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7652282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.0506896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.5692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.51043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1033630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.2202453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2721862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.8975524902344
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 361.6111145019531
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 360.4883117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 364.16558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.92901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.0554504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.4980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.33575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.2119445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.9105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2323913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 362.6652526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.15716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.8568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.2295837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.2770690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8699951171875
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 2 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.4645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.0618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.1263122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6137390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.1976013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.78692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.1358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.4001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.74884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.7904357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8882751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.8768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.76556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2916564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.1665344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.5701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.1183166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4717712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 362.4749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0656433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.4712829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.4676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.24420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 363.8869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.25274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.11859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6476135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1900329589844
############ Running episode number: 318  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0628356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.4716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2279968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.30914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.6207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 362.51202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.1423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.7489929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.9783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.2594299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.65985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.2498474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.39385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.24725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.9989929199219
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 359.6568298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.4009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.8624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.04766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.94696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.5744934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2319641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.8041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.1483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.43109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.59234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 362.2909240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9653015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.14801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.0848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1304016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8400573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7980651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.5022888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.02117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2427673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.4584045410156
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 362.8907165527344
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 2 20.0 1076.17782493 (13.649658108197247, 10)
loss 361.0677185058594
Current State,action,reward,Response time,Next State:  (10, 13.649658108197247) 3 19.0 1101.69413046 (14.283719188889453, 11)
loss 359.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.27264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.60968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.7166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.4297790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.7865905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0904235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.5757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.1995544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.6730651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.574462890625
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 1 20.0 1392.48057747 (19.223969507401588, 10)
loss 360.424560546875
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 361.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.2603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.65850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.72381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.5284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.7541198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.2244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.7668151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.63116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.6144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.07232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 362.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.1778869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.2394714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.91455078125
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.7582092285156
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 360.5475158691406
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 362.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8890686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 363.0386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.3935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.2669982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.6662292480469
############ Running episode number: 319  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.4595031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.2223205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.2640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.6219787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.6885681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.0203552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.77203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.04833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.7607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.9355773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.4807434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.5545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6371154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 362.32891845703125
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 1 20.0 925.738299342 (10.553846649940214, 10)
loss 360.5431213378906
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 362.8834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.7491760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.5075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 362.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.3682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.1952819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.8298034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.7596130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.7684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7076110839844
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 1 20.0 910.69972028 (10.335411397720526, 10)
loss 358.8899841308594
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 360.6369934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.3455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.52520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6057434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.70263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.3323669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.14544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.10986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7661437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.00347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.8149108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.3918151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.6058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.73089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.2393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.5567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.5264587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.56903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.6734313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9451599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 362.940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2773132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.3088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.6405944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 363.2235412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.5771179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.8603210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.2377014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.31781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.38958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 362.2340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.4862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.3468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.29144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6677551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.3219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.41790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.68121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.0444030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.56658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.0331726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7172546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7037658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.4762268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.17535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.54449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.8169860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.4183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.0346984863281
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 2 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.32183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1234436035156
############ Running episode number: 320  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.7583923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.0196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.02410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.54376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.08294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.3391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.40728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.6205749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.8224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.4243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7970886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.5109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 362.2201843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.8108825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.44464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.2657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.7938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.7323303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.90545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.7636413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.38543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 362.1036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.5419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.3691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.2314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.58746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.45220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.8804626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.8741149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.8699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.12396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.03094482421875
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 360.9798889160156
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 361.57598876953125
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 360.1952209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.1112976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.1101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0198059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.1265563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.8863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.03057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.0895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.66888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.8935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.42822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.51373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.4030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.5867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.3092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.4594421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.16375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.8287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.6758728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.85308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.5970764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.13458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5063171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.06658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.2108459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.2357482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.2845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.2563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1224670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7764892578125
############ Running episode number: 321  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.5462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.8981628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.24688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.5342712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.2380065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4254455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.8236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.34381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7107238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1076965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.6354064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7457580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.4940490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.74566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.35894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.5548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3165588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.22186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2539367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.29461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.97113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5708312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6489562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2405700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4281921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1778869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0688171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.09967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.70361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5235900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.0486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.7225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.11126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.0939025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2382507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1786804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1540222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.6499328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8178405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.05987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.7929382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.4303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.55487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.7775573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0634460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7041931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.5215148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9534606933594
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 0 21.0 1278.56065924 (16.84211602880065, 9)
loss 359.9595642089844
Current State,action,reward,Response time,Next State:  (9, 16.84211602880065) 3 20.0 1304.71919827 (16.237094554670044, 10)
loss 359.01654052734375
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 359.9201354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2461242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.50244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.2876281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.4382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1647033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4698791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6778869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.95550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.3221130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.7931213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.29302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3299865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.4307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2241516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.81573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2292175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9493713378906
############ Running episode number: 322  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6656799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3033752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.43585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.6611633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8358459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3869934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.24041748046875
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 359.2080383300781
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 359.62091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 362.1346740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.72430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2608337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.1017761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.24676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.5185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.68560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.1913146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.6502990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.7150573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.5848083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.20037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2308044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1158142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.34442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.56072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9579162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1148986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.4491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.83837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.3771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.36279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.51043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3436584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6656799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2353210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.5999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.2384338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.21142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.7284240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.3894348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.40142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.0863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.2256164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.7162170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0480041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8385314941406
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 359.1591796875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 359.8660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.92376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7962341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9864196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6361389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2806701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.4161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.0586853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.48748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.04254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.2181091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.29913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.70269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.4566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.5957946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7594299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2214660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.36419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.42156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.90838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.58514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.30426025390625
############ Running episode number: 323  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.3162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9261779785156
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 359.4377136230469
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 359.7101745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.64556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.27581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.5535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.5302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.28515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.0025939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9363708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.2319030761719
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 0 21.0 914.701547126 (10.319026962956018, 9)
loss 359.69464111328125
Current State,action,reward,Response time,Next State:  (9, 10.319026962956018) 3 20.0 963.267677113 (10.30224719189987, 10)
loss 360.45977783203125
Current State,action,reward,Response time,Next State:  (10, 10.30224719189987) 3 19.0 924.133757854 (10.278181486298042, 11)
loss 359.3512878417969
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 0 21.0 911.223233653 (10.268274366284802, 9)
loss 359.8750915527344
Current State,action,reward,Response time,Next State:  (9, 10.268274366284802) 3 20.0 960.611029141 (10.335411397720526, 10)
loss 360.6643981933594
Current State,action,reward,Response time,Next State:  (10, 10.335411397720526) 3 19.0 925.892923039 (10.305649118067803, 11)
loss 359.4322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.7900695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.4529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.5462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9399719238281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 360.7586364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.0802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.50750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.01995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.42779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.32147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.8027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2235412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.44390869140625
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 360.3139343261719
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 359.6064147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.92376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0951843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7872619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.4972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.74407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.46917724609375
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 0 21.0 1390.09363446 (19.213467265587269, 9)
loss 359.9300842285156
Current State,action,reward,Response time,Next State:  (9, 19.213467265587269) 3 20.0 1428.84773289 (19.140765783401285, 10)
loss 360.8524475097656
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 360.3681335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.5272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.1553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.12628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 363.23291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1289367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.4639587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.87371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.63623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.9809265136719
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 0 21.0 1200.39231205 (15.817158911312735, 9)
loss 360.4945983886719
Current State,action,reward,Response time,Next State:  (9, 15.817158911312735) 3 20.0 1251.06775133 (15.829956988360925, 10)
loss 359.0794372558594
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 359.9297790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2083435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.83612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.60882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2034606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.1360778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.37890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.7109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.4016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.5417175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.02337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.41461181640625
############ Running episode number: 324  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.14923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.64569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.5649108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.33160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.59307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.1262512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.5155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.5319519042969
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 359.0395812988281
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 360.58673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0727844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.58026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.0881042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4466247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.80267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0982360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.4208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.17822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.400146484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 4 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.93450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.5450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.3345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.58404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2507019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.18865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1535339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.7892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.7412109375
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 2 19.0 914.152581784 (10.390165524255663, 11)
loss 360.6243591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.63873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0750427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.6949157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.8647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.1858825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.16943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.8585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.7113952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.8078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.3838195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.2029113769531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 4 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.37457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.4075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.79302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.20489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.4044494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9892272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1725158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.14483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.5195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.75701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.81512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.7263488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.12603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.31707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.5165100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.56048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.00079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.2679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.3892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0390319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.94134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.33306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7500305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.30010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.58477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2242736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.34881591796875
############ Running episode number: 325  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9981384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.71484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.5389709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0885314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.2874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.35699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3238830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.38897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5057373046875
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 1 20.0 945.729803224 (10.816918347608043, 10)
loss 361.6949462890625
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 361.32403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.22161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.46807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.8647766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.4328918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.27801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1669006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.8821716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7959289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2013244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.8769226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.6331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.7100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.86602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0220031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.36920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.7907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.66741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2863464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.07745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.6551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.2275695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.67156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.18121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.85931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7574157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.45166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.1496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.8160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.16571044921875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 360.2693176269531
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 359.4632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.1942443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8352355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.2973327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.7962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5964050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.3810729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2627258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.32867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.7427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.22772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.8725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.8275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.09747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.38946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0796813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6396789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6960754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8883361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.2043151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.93511962890625
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 2 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.0128479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.38568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4021301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.4217224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.7406311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8456726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4618225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4929504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5244445800781
############ Running episode number: 326  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.2250671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.3961486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.1758728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4498596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.0937805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3622741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.4490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.654541015625
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 360.08270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.35546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.4510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.0992736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.1991271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6660461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.68817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.33709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.15753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 362.1446228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.34832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6299743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.80926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.6974182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.74041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.06768798828125
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 359.6963195800781
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 359.5652770996094
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 361.2075500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3030700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6931457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.6825256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2279357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.03485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9389343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.3128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.2881774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.5905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.7579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.29931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.2510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7828674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.6241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.4075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5492858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1916198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.77093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.3305358886719
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 359.58966064453125
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 360.6908264160156
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 361.3631896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.27935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.14703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.71185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.49725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.39129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.5570373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.4814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.55169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6877136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.7097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.85028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2015686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.8061218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.61322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.3941345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.6271667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.6517639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2371520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.14447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7678527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.8017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.03759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3308410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.82080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.87396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.90887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5724182128906
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 2 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.67767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.5091857910156
############ Running episode number: 327  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8710021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.1617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7071838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.2230529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.3758850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.3045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6898498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.7829284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.38458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.1018981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.2009582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.0606384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 363.2901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.88079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.4891052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.75750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.4994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.5307312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1714172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7159729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0091247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.2030334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9393615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.22454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.11944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.6564636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.5932312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0840148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 362.06005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.3243713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.7445373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4713439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.42169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.835693359375
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 0 21.0 992.681522335 (12.19918626616789, 9)
loss 359.91180419921875
Current State,action,reward,Response time,Next State:  (9, 12.19918626616789) 3 20.0 1061.68473805 (12.501496275411796, 10)
loss 361.00537109375
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 359.957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.63311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.53216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.9241027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.54449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1261291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.42425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.9388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.3000183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.83843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0083923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.4386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.5461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.7821350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.45355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.87213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.1759338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.05328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.20269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.2022399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.2834777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.09185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.39910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8703918457031
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.5570983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.8491516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.12237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6315002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.4856262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 362.413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 362.2562561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7079772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.2251281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.46533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.4971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.8684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6048889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6214294433594
############ Running episode number: 328  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.08453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.2419128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.61260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7698669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.4967346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.4270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.47406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.6343078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.3359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3743896484375
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 2 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2182312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4955139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.49078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8315124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.75
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6355895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.39166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.7724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.2184143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.29742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5042419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.5869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.3880310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.1587219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6493225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.61962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.77252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 362.70025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.2105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.7042541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.64788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.4959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.7255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.66314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.42401123046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.75152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.89935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.4577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.75732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0845031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.5342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.3887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.70709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6559753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.8031921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.4071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.37786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3951721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.8436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.82855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.4648132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.3568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.7724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.57342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.23193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.2074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.02880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.43121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7478942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.29736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.3240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.58074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.34246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.1725158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.77471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2481384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9840393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.20550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4483337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.0294494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.5101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.69964599609375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 359.26641845703125
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 360.69024658203125
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 361.3988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.26397705078125
############ Running episode number: 329  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.5325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.2843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3172302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.58502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.5469665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8029479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.40899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.72991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.7492370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.76513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.4690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.3191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.7586975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.60089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.7098693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1204528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.6502990722656
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 2 19.0 911.223233653 (10.268274366284802, 11)
loss 360.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.9120788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.6876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.5365905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2950134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.69384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6784973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.01251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 363.01165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.2397766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.01202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.3017272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.3250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.66375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.4269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.95819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.5477600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.5335998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.08453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.4081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.7242736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3322448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0492858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.80755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.14703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.7011413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.46612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7642517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.1556701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.0470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.3450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.0281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.85272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.8555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5861511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 362.464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0486755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.5439147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8135681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6153869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.3682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.5721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.70538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1465759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.13623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2649230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 363.5343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.0800476074219
############ Running episode number: 330  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.6839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 362.2372131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.84124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.0220642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.78997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.51904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.4281311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0601501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.3226623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.14581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2929992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2808532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.0333557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0559997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.7742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 361.54498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.00970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.9644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0754089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.7572937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.2372131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.7378234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0921936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.5971984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.01397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.28173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.64495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.4827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.9791564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0434265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.75909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.0747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.3000793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.28155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.3572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 363.1171569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.6006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 362.188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9808349609375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 2 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.66876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8095397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9910583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.44873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 362.79669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.5671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.40087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.4289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.2442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 363.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.3782653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8619079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.43011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 362.46856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.6001892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.6151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.7483215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6358337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.3938293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.04046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2835998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 362.3193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.5292053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.26129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.07244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5117492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.21771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.48150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.9480895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0176696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.0904235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.5718688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.2913513183594
############ Running episode number: 331  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3172912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5876770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.6385803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.31195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.24993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2978820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.5873718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.22265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.6514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.87774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.17633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.4920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.1119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.1300964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6148376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.02484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.70123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.3954772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.70147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.05108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6504821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5722351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.4637756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.5755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.60400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.5100402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.3064880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3144836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.96240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.09405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.4847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.15496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.3034362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.5137634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.5918884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.40240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 362.0110168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6634826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.8379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.8394470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.67913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 362.1451110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.43865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.99322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.33563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.33154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.41888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.49151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.75201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.1938781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0907287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.33111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.39642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.2934265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1811828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.2561340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2831726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.1811828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.1703796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3072814941406
############ Running episode number: 332  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.79644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5124816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.58612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.40106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6901550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.36114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.3993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.76153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.8046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.4046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8641662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1517639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.09954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0412292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.8136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.5154724121094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 4 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0959167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.71978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.87493896484375
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 1 20.0 917.051082408 (10.344006106602812, 10)
loss 362.4730529785156
Current State,action,reward,Response time,Next State:  (10, 10.344006106602812) 3 19.0 926.348821567 (10.319026962956018, 11)
loss 360.3946838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.1268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.06561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0602111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.5440979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1158142089844
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 0 21.0 909.046654676 (10.236272697871373, 9)
loss 360.28643798828125
Current State,action,reward,Response time,Next State:  (9, 10.236272697871373) 3 20.0 958.935899728 (10.369891240151098, 10)
loss 361.2840881347656
Current State,action,reward,Response time,Next State:  (10, 10.369891240151098) 3 19.0 927.721874973 (10.316955310454549, 11)
loss 361.2364196777344
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 1 20.0 913.272125304 (10.333617326102203, 10)
loss 361.69256591796875
Current State,action,reward,Response time,Next State:  (10, 10.333617326102203) 3 19.0 925.797758139 (10.390165524255663, 11)
loss 359.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.66522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.7756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.5950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5326232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.4947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8985900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.0784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7534484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.40802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.77880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.2453918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.9384460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.82183837890625
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 360.4571228027344
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 359.3548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.62481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.70245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.1843566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.6701965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2671813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.00732421875
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9512634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.8338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.0986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.1220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.0709533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.0735778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.08795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.19537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.5645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5223083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.4155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.9266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.22100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.37841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 362.2982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.76666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.23480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 362.5702819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.3622741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 362.6253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1313781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.5108947753906
############ Running episode number: 333  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.18359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.34722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.77154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.3854675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4020690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.1643981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.2493591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.7651062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.36639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.62451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.49261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.32275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0934143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.81768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2146911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.25054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.5303039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.75030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.17529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7153015136719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 360.4899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.57122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2601013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.80596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.59783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.1382751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.2960510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.56646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.5338439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.05908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.2145690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.19915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.37957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.04193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4136657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.81036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1529235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5051574707031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 4 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.65570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.5245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6711730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.70953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0738830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.6846618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0466003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.49310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.4736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.06243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.8589782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6775207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.80743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7619323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9334411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6558532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3941345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7837829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.4046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.19378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.62646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0013122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.30224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.5284118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.85638427734375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 359.59613037109375
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 360.6985778808594
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 360.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7871398925781
############ Running episode number: 334  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.12286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2394714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.64697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1459045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.1777648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2978820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.16943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.6191711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.74163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.3248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.67791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.3725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7512512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.6422424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.55975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.7127990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.65771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2308044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0310363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.4303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1764221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.58343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.63037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.1937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.96136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7840881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.33599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.4484558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7353210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.24774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.30224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.1750183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.9656066894531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 4 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.97442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.0054016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.8175354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.49114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.52093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9768981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0704650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.8843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.39178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.2126770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.5771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.2471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.29620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3485412597656
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 360.9785461425781
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 359.4747009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.14825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.9685363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.71563720703125
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 2 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.7728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.31170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.21856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5829162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.55120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.9801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.14276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.45416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3836669921875
############ Running episode number: 335  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.52191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6564636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.0314636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7647399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.24462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.6310119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3360595703125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6379089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1652526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7201843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.4064025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.61724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.08447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9937438964844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 360.04388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.67779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.64447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.02374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9062194824219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 4 19.0 909.046654676 (10.236272697871373, 11)
loss 360.0807800292969
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 2 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.06683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.69775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.23712158203125
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 2 19.0 917.140709305 (10.425974763084863, 11)
loss 359.58941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.68292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.59014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0743713378906
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 0 21.0 937.284736847 (10.924797168745895, 9)
loss 361.13140869140625
Current State,action,reward,Response time,Next State:  (9, 10.924797168745895) 3 20.0 994.97675791 (11.039747673816453, 10)
loss 360.3893127441406
Current State,action,reward,Response time,Next State:  (10, 11.039747673816453) 3 19.0 963.253801267 (11.271571944085663, 11)
loss 359.06298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 362.10223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.86737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.58203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.51190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.30224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.0763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.5981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.65093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5723571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.20916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.06610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.62786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.2667541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.5272521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.60284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.24139404296875
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.7876281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.5356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5503845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.2631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.14959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.69598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6741027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.3514709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.7467956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.44793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.17987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.25738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.4584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.53265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.8266906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.1220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.26336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2224426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.87396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3044738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0738220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9429626464844
############ Running episode number: 336  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.82440185546875
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 2 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5863952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.62469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.73291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.08856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.8270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.73370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.5245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.61871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.85650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7164611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.75042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.3207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6836853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.7934265136719
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 1 20.0 919.412094444 (10.44185150623065, 10)
loss 359.186767578125
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 361.7899475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.7982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.37530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6463317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.5242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6576232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.86761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.8701477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3647766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.33709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6995544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.99639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7841491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1804504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.9176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.4459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.00323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.1422424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.43597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.5451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2051086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3977355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 362.8592224121094
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 358.7541198730469
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 359.4175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.7532653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.2796936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7655944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.7414245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.34515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.7765808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.01593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3773498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2392883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0741882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.7264099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.27459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.2394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6474304199219
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 362.7447814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.77801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2267761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.75250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.52801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9163513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.82464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.8617248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2983703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5192565917969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3469543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.6335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.5993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.27911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.13592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7171630859375
############ Running episode number: 337  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6389465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.8328552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5460510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.3959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.33038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.20294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.00665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.51824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.66021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2425231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.13555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2605285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.8773498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.66790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7106628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.6102600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2024230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.7317810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1841125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.17279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.28533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6847229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0932922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.9632873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2109069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.83074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.87261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7053527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.46734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.17803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1013488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.04254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2110900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.4694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.1783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6565246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7354431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8323669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7925109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.2593078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.76800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.13385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.36767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8453674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.5559997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.74041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.95989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.99005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9725646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.6629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2211608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.38006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.67169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7068176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.0469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1005554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1219787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.9388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.2371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.04193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.88385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.36773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.78131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2026062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.44287109375
############ Running episode number: 338  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.48260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.5226745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.37799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.28240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.7486877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.25494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.89459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.2675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7211608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6405334472656
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 359.0848388671875
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 359.40673828125
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 359.7787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.6382751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1626892089844
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 2 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5585632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6615295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1644592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.63763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.12652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.34820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.95355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2693176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.5326232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4037780761719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.4181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.8080139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2291564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.96514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3193664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3037414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.78668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.5804748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.30438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.34368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.7515563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.66693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2068176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.5781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.8233947753906
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 0 21.0 1373.60319427 (19.286321916040979, 9)
loss 359.6749572753906
Current State,action,reward,Response time,Next State:  (9, 19.286321916040979) 3 20.0 1432.66131431 (19.340464848017284, 10)
loss 358.8755798339844
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 359.10491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1382141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.4783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.9323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.41119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0038757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6064147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7695007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.46588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1044616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.52081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.8164367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.36676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.71380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.5777893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4331970214844
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 359.29693603515625
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 360.35235595703125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 360.7873840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1919860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.45477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.64434814453125
############ Running episode number: 339  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.99993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.49493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.7236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.3917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.6428527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.28143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.6145935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4309997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.9785461425781
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 359.97698974609375
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 359.8933410644531
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 359.196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.0535583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.00238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5438537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.5535583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.7959899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.73797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.38641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.53851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.43572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6242980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0859069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.80999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.1013488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.6240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8016052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7087707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3077697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.5699157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3072814941406
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.63482666015625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 359.82049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.5912780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.2771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.7754211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.8359680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0898132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7519226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.6980285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.8457336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.9212951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.2575988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2196960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.3664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.0543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.4227600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.06695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.2364196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1932067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.6572570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.38177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3532409667969
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 0 21.0 1310.13203606 (17.229782241685768, 9)
loss 361.51324462890625
Current State,action,reward,Response time,Next State:  (9, 17.229782241685768) 3 20.0 1325.01161138 (16.84211602880065, 10)
loss 360.0854797363281
Current State,action,reward,Response time,Next State:  (10, 16.84211602880065) 3 19.0 1271.03516211 (16.237094554670044, 11)
loss 360.41436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.9769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.88812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2477111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.5545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.38671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.78759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.89532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.61712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7445983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.8311462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.40545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.2628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.2184753417969
############ Running episode number: 340  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.17230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.28387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.18902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.11956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.15301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3855285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.35223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7978210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.1592712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.6681213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.1092834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.11968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3824768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.6983337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.64398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2256164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.0570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.4162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.46661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2843322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.5358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.44384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7586364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.46771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.85821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.6456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.01031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0809631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.60284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.5137634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9662780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.2772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.07293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.16693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.6361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.4383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6371765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7618103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.1239318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.3141784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.35888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.1740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.8880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.2348327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.13775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.87847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4230041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1949157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.4422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.0514831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.1575622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.9848327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.98663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.71124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8167419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.0909423828125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 361.702880859375
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 359.42620849609375
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 359.2924499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.68341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.17535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7110900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1131896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.65704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.2013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.3912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.5433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.9940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7653503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.30999755859375
############ Running episode number: 341  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.83917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.3031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.5194396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.4077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.2611999511719
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 360.6046447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.5776672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.6518859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1518859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.49249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.17529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.5985412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7809143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.2017517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.63427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.17816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.48822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.1773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.44219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.1311340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.1921691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.8909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.57855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2569274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 362.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1065368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.28118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.42095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.4123840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.9399719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.7779846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.5964050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.18212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8897705078125
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 360.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6099548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.9807434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8426208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.4880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.32806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.57415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.27166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.61444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.6325988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1201477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.74591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.3910217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7978820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.0693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.56927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5443420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.0545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.0368957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8518371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2331848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.1726379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0874938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.73944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.7320251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.3953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.5677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.49713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.16748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.26043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.1585998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.39056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.6883850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.18255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.6836853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.81707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.65093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5970458984375
############ Running episode number: 342  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 362.0711364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1946716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.7163391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.26629638671875
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 2 19.0 945.729803224 (10.816918347608043, 11)
loss 360.5580139160156
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 2 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0236511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1769104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.0008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.6427307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.5601501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1588439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9667053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.66009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8963317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.7371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6377868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.6584777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.4656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.5836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.5392150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.86688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.38555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.1178894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.06353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.19384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.89593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1744689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.8884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.1999816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.2363586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.1361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.54669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6974792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8487854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.7334289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.7648010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.1169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.4869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9171447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.85919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.32977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8626403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.6394348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.4503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.95880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0197448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.4694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.14385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.11358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.8598937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2101135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.8014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.0052795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.68585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0772399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.64227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0889587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.7849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6570129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.53448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.40673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2283630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.8646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1023254394531
############ Running episode number: 343  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2142639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2760314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.2417297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3426208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.6707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.32061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0037536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.45916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.86590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.3706359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.7878723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9302673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.74359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.77105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.6668395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.565185546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2190246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.4583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.69537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.5184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8072814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0793151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 362.33685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.63250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1398620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0924987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.0321960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.5736999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9472351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.2594299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0646057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.3929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7466735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6712951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.9184875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4187927246094
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 360.5828857421875
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 359.3985290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.74554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.4551086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.57635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.67822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4099426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.30810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.9900817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.35504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.6537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.76861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.05291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6261291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.57305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.63580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1697082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8280944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8970031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.6646423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8414611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.5249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.41552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.2068786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.72540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.87432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.1234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.42767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.71405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7696228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7442321777344
############ Running episode number: 344  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.29345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7217102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0315246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.7739562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.3741149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0805969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 363.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7764587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.78485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.99932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.2507629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.3147888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.50201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0453796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.4951477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.7236328125
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 359.49267578125
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 360.5343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9259948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.6925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3310852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6324768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.5099182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.57000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.55316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.8878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.2699890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.8312072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.23809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.2233581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.62579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.85302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.4857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.2369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7163391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.7587585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.54534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1575622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7970886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.3605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.8585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.6304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9541931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0569763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.1059875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.99462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.5134582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.33709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4552307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.9249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.26214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.43072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.6701965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.5140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.51739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.41064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.06591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.15447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.44207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.20513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3077697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.1441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2975158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5585632324219
############ Running episode number: 345  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2748107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.28533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2536926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2810363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.61181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.55889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.9262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.59033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.5321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.5891418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.41448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.29449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.39703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.4978942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.18511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.5486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.16748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3434143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.4951477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.6824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2881774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1576232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.72003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.34014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.64215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.73785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.3421325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.82867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6476135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1686096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1803894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.78204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1695251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.51336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.8555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.4002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8999328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.58135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.84735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1030578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.23870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.92181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.50262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.54388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.5506896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.23016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2356872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.6431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.88482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7433166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.0303039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.4074401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.37921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.21600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.5357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 362.3471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.36004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.10205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.99566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.1374206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.30511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.7471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.1540222167969
############ Running episode number: 346  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9517517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.0762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6048278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 362.8122863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9765930175781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 4 19.0 949.137050055 (10.931193889570471, 11)
loss 362.38916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.9773254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.74224853515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 4 19.0 939.812260006 (10.768325938188134, 11)
loss 358.71258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.0780334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.54315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.79595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.64385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1131896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7688903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0569763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.2570495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.85333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2204895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.3003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.59283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.78350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.16546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.2279968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8049011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7088928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.03619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.5733337402344
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 0 21.0 925.376677007 (10.655373370049301, 9)
loss 359.513916015625
Current State,action,reward,Response time,Next State:  (9, 10.655373370049301) 3 20.0 980.873751654 (10.624473674922116, 10)
loss 359.5710754394531
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 359.16961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1584777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3738098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0674743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.97857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1445007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.55328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.52789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.17376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0897521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.23162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.54437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.30535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.50250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.5067443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 362.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.8174133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.5933532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.8665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.12628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.2893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.57269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.8219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.57684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9754333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.2008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6573791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.7369079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8514404296875
############ Running episode number: 347  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.74151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0550231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.03997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.9220275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1838073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.6619567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.8171691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.66253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7630310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.65081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.74908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6214294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.10198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.6558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.3138732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.93701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0360412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.49859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.19366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.7867126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.08538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.40399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.65203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3572692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.25677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.4013977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.4696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.26336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.66650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.35235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.2408752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.12542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.0716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5644226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.9277648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.0531311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.8468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.1549377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.1617126464844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 4 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3072814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.0868225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.71392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6441345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6570739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.4142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7027893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.03558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.7728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.99835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.31890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.76251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.71868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7754211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.24493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.0072326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.16156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.5440368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3159484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.95074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6811218261719
############ Running episode number: 348  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.7341003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.2130432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2815856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.26898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 362.2018737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1739196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.88189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7484436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.5774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.46343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.5251770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7585754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.6614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2177429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.8534851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.48211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9920349121094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.1616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.03167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.4319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.76123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.39776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3580017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.33160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9424133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4940490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.33782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7769470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1439514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1843566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.7585754394531
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 359.3463439941406
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 359.9163818359375
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 359.1907653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.50567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.6188659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.500244140625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 361.37664794921875
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 361.6871643066406
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 359.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.5314025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1936950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.22613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.1844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.66888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.1011657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.84100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.59039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7826843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.4118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.33074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.32403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6573181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3106994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1460876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.12969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.5855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7000427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8938293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.87933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.5922546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.12860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.30328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7862243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.4172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2820739746094
############ Running episode number: 349  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.0213928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.68182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.6937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.6784362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6488952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.53179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.70550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7730407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.15228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.40374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.64306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.2464294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.33441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5242614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.7803039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.64593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.44049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6067199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.3753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1907653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7197570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.84228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.91436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.9192199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5708312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.78717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9677429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.4731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.686767578125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5384826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.54754638671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8858337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.7483215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1609802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3878479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1114196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.31817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1374816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.18505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.1674499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.70977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5368347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.96771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.72216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.5023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.74261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.6741027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.47918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.80438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.7339172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.17083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4793395996094
############ Running episode number: 350  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.35052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2951354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7154235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.01373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3948669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.52423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.21234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.2529602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8879089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.19952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.21270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1222229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8229064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.07574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7063293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.54876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0522766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2453918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8252868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.94500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6733093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7718811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.34136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.7270812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.5098571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.73358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.61297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7939758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.74798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.92633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.86163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.34130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1761169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0984802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1437072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.5868225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8567810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.30841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.34393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.07781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.88836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.88519287109375
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 2 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.68939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.3851623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7372131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7702331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8973083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2965393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.78326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.29290771484375
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 359.58941650390625
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 359.7959289550781
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 359.3831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.74786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.77703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1866149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9940490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.2645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5000305175781
############ Running episode number: 351  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.7016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1308288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.37762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.78350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0503234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9838562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8974914550781
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 359.9995422363281
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 358.6366271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0107727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0807189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.40777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.38433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.76129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.68792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.28753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6150817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.8710021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.08074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.12786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7314147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7290954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.7757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.72076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2451477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.35546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2389221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.39337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0377502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.5309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7124328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7571105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.78985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.97125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.81707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.82470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3948974609375
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.66094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3172912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.1742248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9346008300781
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 1 20.0 1354.56874896 (18.375894992990247, 10)
loss 359.5818176269531
Current State,action,reward,Response time,Next State:  (10, 18.375894992990247) 3 19.0 1352.39307459 (17.82724819986867, 11)
loss 359.88714599609375
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 360.402099609375
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 360.782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.72369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.04949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0693664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.5232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.77276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.93316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.58966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.4688415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.5042419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.4818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.62103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.86419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7333679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.6737365722656
############ Running episode number: 352  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9665222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.5991516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.6194763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.25067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.69757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.14007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.6294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2455139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3595886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3513488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8724060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.68365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.75933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.06951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2013244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.67987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8806457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3043518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2630920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.81842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9441833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.55999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.25689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.94500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6340026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.15283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.84637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4742126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.9399719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4930114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8198547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1140441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1646423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2111511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.7823791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2438049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.06146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7682800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6045227050781
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 1 20.0 1294.6873044 (17.669285735563751, 10)
loss 359.7624206542969
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 358.7129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0202941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.49200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.72747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.75250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.57781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.9159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.5394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.93426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8360290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.23681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.94110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7225646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.0907287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1974182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7640075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.20391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8717956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6296081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.46923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.5589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.4575500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.03125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2283630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8777770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5957946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.9420471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3542785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2929992675781
############ Running episode number: 353  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2375793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3151550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.62164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1509704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6871032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.41387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.1767883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8139343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.19171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.1878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.89739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.45086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.61474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.84130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.43524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.1296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.8372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1679382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.52093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.6302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7538757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7334899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2853698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.40179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3062438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0744323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0353698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.5933532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.86553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7410583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.5972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.67852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.1334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.91912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9409484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.8083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.2418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.26470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7253723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.6212158203125
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 1 20.0 1383.38281107 (19.140765783401285, 10)
loss 358.7340087890625
Current State,action,reward,Response time,Next State:  (10, 19.140765783401285) 3 19.0 1392.96495117 (19.385636054792762, 11)
loss 359.2935485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.99920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.6425476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.69232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.70440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7380065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.80841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.5887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2493896484375
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 359.1103210449219
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 359.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.58026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.29351806640625
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 0 21.0 1213.81868812 (16.017694914042416, 9)
loss 360.1353454589844
Current State,action,reward,Response time,Next State:  (9, 16.017694914042416) 3 20.0 1261.56482143 (15.947547279389703, 10)
loss 360.0346984863281
Current State,action,reward,Response time,Next State:  (10, 15.947547279389703) 3 19.0 1223.58357506 (16.11465619633363, 11)
loss 358.751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.6119079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2292785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.69427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.24127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.78369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6549072265625
############ Running episode number: 354  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.1055603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8385314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8896789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7464294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.83428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.89117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.5787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.25897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.43707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8883972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.69500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.89947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7754211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.71514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8864440917969
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 2 19.0 913.381595845 (10.30224719189987, 11)
loss 360.42730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.7119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.8390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.4110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.52593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.8608703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2072448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7549133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.9040222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3306579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.45147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1312255859375
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 0 21.0 1012.73322757 (12.501496275411796, 9)
loss 359.06597900390625
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
loss 359.93804931640625
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 361.6867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.60205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.5104675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0525207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.3125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2360534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.9443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.19964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.86163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 362.6571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8775939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3721618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.1222839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.5802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1867370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8183898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0806579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.42938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.5668029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.6632995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2601013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.30267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2757873535156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 4 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.04364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.76934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2486267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.49853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7170104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4130554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5717468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9408264160156
############ Running episode number: 355  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3119201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.1805114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 362.37274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.9996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.48321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.9259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1069641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9641418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.0916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.94171142578125
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 2 19.0 939.812260006 (10.768325938188134, 11)
loss 361.45904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8005065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.5137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8744201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.1009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.6847229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.58013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6716003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1368103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.7652282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3155212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.17864990234375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 4 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.0703430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.6788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.16485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.7888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7241516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.17376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.4762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.4445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0688171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.2995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.7594909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.5520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.46502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.6317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2460632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.72186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.4488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.5113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.4064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3432922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7933044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.4400939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.7677307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6746520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.4446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1798400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.7018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.33404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.65655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 361.3455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.2154846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0822448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7479553222656
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 0 21.0 1383.38281107 (19.140765783401285, 9)
loss 359.71875
Current State,action,reward,Response time,Next State:  (9, 19.140765783401285) 3 20.0 1425.04216908 (19.385636054792762, 10)
loss 359.2469177246094
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 359.4334411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3102111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7568054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2756652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.8502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.46160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.17431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5019226074219
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 2 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4052429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.0823059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.26171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.58905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.8666687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.8450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0010070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.9664001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1607971191406
############ Running episode number: 356  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7046203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2618713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.62347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.3179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6750793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6981506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9394836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.3519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9299011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.6617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5105895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.71661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.32843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.31597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.68231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.79486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.43255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.44873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.0430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0230407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.32757568359375
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 2 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0879821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1261901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.52801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.0347595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.77435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.31475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4776916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.5195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.7419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.0943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.36309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6611022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.5203552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6517639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.0881652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.08770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.0071105957031
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1573181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8764953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7676086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.70947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1730651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3303527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3858947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8601379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.16668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.5146179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.7238464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.2747497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4550476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 362.1627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.34088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.07977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.80438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.33648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.16748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.7823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.71142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.5640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8186950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.2739562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7770690917969
############ Running episode number: 357  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.77691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.6741027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.22930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7095031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.2950134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9729309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.13018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9990539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.2073059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2351989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2878723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1434020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.65740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7910461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.0799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.7173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6356506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5641174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.1454772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6156921386719
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 1 20.0 911.133954163 (10.236991269871366, 10)
loss 359.1044921875
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 359.6177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.5082092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8082580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.0633850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.14923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.6038513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.6878967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.78985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.63897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.69921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.7378845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4277648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.45758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2959289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.6382751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.03546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.6009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.8277587890625
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 359.7411193847656
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 360.3565673828125
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 360.1880187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.5950622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.71807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6548156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.6853942871094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.0547180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.13262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 362.2480773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2951965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.43707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.66998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.29443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7273254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.01251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.3018493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4406433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.82611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.3682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.7177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6969909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.15167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.5893859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4116516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.9675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3226623535156
############ Running episode number: 358  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.5792541503906
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 0 21.0 990.920419923 (11.61852219546234, 9)
loss 360.33551025390625
Current State,action,reward,Response time,Next State:  (9, 11.61852219546234) 3 20.0 1031.28983953 (11.469111876584304, 10)
loss 360.6047668457031
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 360.0461730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2671813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.30230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4045104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7606506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.9393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.3691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.22979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.9225769042969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 359.09130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.1361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.4474182128906
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 0 21.0 913.381595845 (10.30224719189987, 9)
loss 361.22607421875
Current State,action,reward,Response time,Next State:  (9, 10.30224719189987) 3 20.0 962.389338906 (10.278181486298042, 10)
loss 362.6376953125
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 359.2001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.9202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1308898925781
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 360.6340637207031
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 359.56201171875
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 360.0634460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.8776550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.32098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.1762390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.87274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.2563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0552673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.8675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.2806091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.41802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.22271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 362.20819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.5030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8413391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.11016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.05047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.50714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.45849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.4796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.6417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8559265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.9602966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7022399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0160217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2157287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3699035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.02899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.9229431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.5169372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.0439758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.33526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6788635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5601501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.8377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.406494140625
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 359.5819396972656
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 360.3542785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8103942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9823913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.39410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.2080383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.78070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.87896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.46514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.9164733886719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.68634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.6355895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5526428222656
############ Running episode number: 359  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.98004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.7715148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.22137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.96722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.2966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.15875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.5799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.5669860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7500915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.72650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.93603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.6548156738281
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 2 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.5080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.91436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.84832763671875
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 359.2275390625
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 360.5156555175781
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 361.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.5703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8302917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.4619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.37347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.5192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.3146057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.6676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.4652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.52252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.1813659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.76611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.19036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.60638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.69500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.0990295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.5966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.8983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.6517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5581359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.7696228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.6827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3067321777344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 4 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.8042297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.6046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.38458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3653259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.61285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.36383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0290222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2272033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.2909240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.07818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0476989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1012878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1327209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.43292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4090881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.37408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.03900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.6706848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.18487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.81365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.2951965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.38116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1388244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8115539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.71978759765625
############ Running episode number: 360  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8489074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.59588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.27301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.2304992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.36065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5069885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.42901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.98077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.28375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.76507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.7790832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.7598571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.21405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3402404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.94610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.58245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.0345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.1358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.4393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5988464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.10211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7444152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.5350646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.05828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.24627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9360656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.40234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.51593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.56719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.4280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7430725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.6824035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.6226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0820007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.78948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.36712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7668762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4136657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.4974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.85430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.2452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.2252502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.28289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.2465515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.1447448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.51385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.29046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.77642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2436218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7254943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.6483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.3179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.19122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0155944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.40985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9729919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7488708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.7373352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.0730895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.8578796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3930358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2225036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.32806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.81768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0743713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.8260498046875
############ Running episode number: 361  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.71209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.34429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.13458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3646545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.28338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.4745788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.0002746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.99468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.9227294921875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 4 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8708801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.6776428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.98016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6763610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.1846008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.00970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.76727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.5882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.7029724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.5792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.6562194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.3871154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.68719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.3607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.5928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.4425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.3164367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.9512023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9058532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.894775390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 359.97210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.33062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1056213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2495422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7755432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.3200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.19683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.0615539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3833312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.25799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.63397216796875
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 0 21.0 1122.88439768 (14.677479537099185, 9)
loss 359.25982666015625
Current State,action,reward,Response time,Next State:  (9, 14.677479537099185) 1 22.0 1191.41116041 (15.353965082180355, 8)
loss 361.2038879394531
Current State,action,reward,Response time,Next State:  (8, 15.353965082180355) 3 21.0 1268.40282167 (15.836943704090487, 9)
loss 360.7519836425781
Current State,action,reward,Response time,Next State:  (9, 15.836943704090487) 3 20.0 1252.10338759 (16.466876895473597, 10)
loss 360.9937744140625
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 358.442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 363.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8491516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6972351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.3388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.0132751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.04736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.75823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.1546325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.4261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.7422790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.94451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6624450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.38397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.72625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.3440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.37060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 363.4132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.96575927734375
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 1 20.0 1184.33851965 (15.750501603468638, 10)
loss 361.3708801269531
Current State,action,reward,Response time,Next State:  (10, 15.750501603468638) 3 19.0 1213.1314661 (15.817158911312735, 11)
loss 360.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.7743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.6825256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 362.50927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.20733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.66314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.10430908203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 4 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.0899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.7027282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.87237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.67333984375
############ Running episode number: 362  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.12109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.4279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.9908752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.52154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.48797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.6960754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.39520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.3823547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7565612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.2237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 361.1900329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.8717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 363.4215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.6324768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7597961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5571594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8410339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.6982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.5024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2076721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.6053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.3104553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.91534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.0250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.65960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.3095397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4761657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.0046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.3966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1169128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1388854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.02374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8883972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.2115783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.51416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 363.0533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3897399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.12615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.1556701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.7921447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.6900329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1714782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9100036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5819396972656
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 1 20.0 1354.73183582 (19.02839494033929, 10)
loss 361.1777648925781
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 361.24041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.67608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3142395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.62030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.17633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.9134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.5565490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.1337585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.6888122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.61090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.1378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.24102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.71087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.4278869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 361.8826599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.7689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.3633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.3735046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.6479187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6513977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 361.5550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.5224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7765197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5680847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0572814941406
############ Running episode number: 363  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.7780456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.1986389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.43658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.58001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.62451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.0657653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0108337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.1185607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.17718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.00927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.57855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.5336608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.5971984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.5462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.99359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.25665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.23455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.71435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.0320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.8843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.1142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7607727050781
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 360.5112609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.1200256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.5671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 362.7245788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.12005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.1294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.43304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7594909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7902526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.3861999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.2890625
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 2 19.0 919.032945938 (10.546025383098053, 11)
loss 362.2460632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.4400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.5234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.5975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.0614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.4970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.13983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.29791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7092590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.5833435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.1692810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.8155822753906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 4 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.3493347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.35101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.2689514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.11700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.60272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.75640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.31292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.4941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 361.4212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.5797424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.0615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.34808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7167053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6842346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.9716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.7667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.6457214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8256530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.4400939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 361.671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.4176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.11456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.8152160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.8063659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.1606750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2541809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.4888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7580871582031
############ Running episode number: 364  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.54144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 362.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.64019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.03363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9441833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.58538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.33935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.1541442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.2046813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7232360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 361.2594909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2754821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.2734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.74505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0251770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.17523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 362.367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7184753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.2748107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1304626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.52069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.23138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.26593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1498718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.79840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.7945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9550476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 362.0508117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.6702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.2870788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.28204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.66265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.61334228515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 4 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.5072326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.23614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.75653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4363708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3641662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.5483703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1789855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.90966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8759460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.71807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8481140136719
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 1 20.0 1387.23260634 (19.340464848017284, 10)
loss 361.77178955078125
Current State,action,reward,Response time,Next State:  (10, 19.340464848017284) 3 19.0 1403.55780672 (19.213467265587269, 11)
loss 359.17926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0774230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.0673522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2456970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.6496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.3656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.82684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.91650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1706848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7572937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.2674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.57940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.83648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.16680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.61920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.2955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 364.5366516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.2393493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.0450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.07879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.7674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.1678771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6047058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.5228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.10638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8547058105469
############ Running episode number: 365  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.63323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.56878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.87615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.6728210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2696838378906
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 2 19.0 962.898956888 (11.027107764209074, 11)
loss 360.7166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.6088562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 363.3056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.1437683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.1893005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8381652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7795104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.7077331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.47271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.55389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.6793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4520568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.3378601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.3607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1040344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.6407775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1033020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.8906555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.2857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.47991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.13543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4130554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.0978698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 362.247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.7591247558594
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 361.1200866699219
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 359.8468017578125
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 359.9149475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.7351989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.93914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.22186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.5860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.0851745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.7749328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.3655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.07000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.3070373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8749084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.0506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.9831237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5198669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.4107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.2117614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.90826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.7316589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.70965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.6241760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.61767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.68817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.78466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.91485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.70941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9951477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.1149597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9557800292969
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 2 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.2237243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.01776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.0810852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1141052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.5301208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.78509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.5457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.9841003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 362.2238464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0096740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.25946044921875
############ Running episode number: 366  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9960021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.6961975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.8267517089844
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 1 20.0 982.049698353 (11.469111876584304, 10)
loss 359.7955322265625
Current State,action,reward,Response time,Next State:  (10, 11.469111876584304) 3 19.0 986.02903554 (11.336751742492702, 11)
loss 360.15142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8406066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.74310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.20623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.5027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.1029968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.29852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.24639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.2998352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.148193359375
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 0 21.0 925.738299342 (10.553846649940214, 9)
loss 359.12628173828125
Current State,action,reward,Response time,Next State:  (9, 10.553846649940214) 3 20.0 975.559328891 (10.489125480251131, 10)
loss 360.0040588378906
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 359.660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8407287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.5342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.12139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.67120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.6094665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.98126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.3055114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1357727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1795959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.14752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3484802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.3106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.6492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.7265319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.9043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 363.6199645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.5788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.1856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0860290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3595275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9056091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.28515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.69232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.6123352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5893859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9601745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.1048889160156
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 2 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.26300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.6779479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.3948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1004333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.61077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.73834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.9891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.13653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.53662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1576232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.58416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.8234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.5941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5641784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 362.2716369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.1793518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.83526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6130065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9344177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.4847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7716064453125
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 0 21.0 1214.51137704 (15.947547279389703, 9)
loss 362.3478088378906
Current State,action,reward,Response time,Next State:  (9, 15.947547279389703) 3 20.0 1257.89293893 (16.11465619633363, 10)
loss 359.3666687011719
Current State,action,reward,Response time,Next State:  (10, 16.11465619633363) 3 19.0 1232.44771583 (16.147078378791146, 11)
loss 361.126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.1813049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.03631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.7072448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.8646545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 361.1352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4483337402344
############ Running episode number: 367  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.6228332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3407287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4385070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.62432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.86529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.5746154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.2422180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.3340148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.61090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.4306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.5052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.33331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.55633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.3694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.07904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8013610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.84442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.0666198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.03057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9555969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.9019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.61822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0662536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 361.234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 362.0754699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.6790466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.58135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8973083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.17620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.8844299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.80706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.42999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.3230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.3654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.71343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.19757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.78826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.4979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 362.9673156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.63671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.8910217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.1947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.6605529785156
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 359.37799072265625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 360.1104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.15826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.12506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2355041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.33038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.6110534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.29595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.0135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.4694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.3636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.8819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.22479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.3689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.3831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.9720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.4199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0646667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.19989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2299499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 362.0481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.2229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5967102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.47564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 362.1947326660156
############ Running episode number: 368  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.20208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.2200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.36004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7065734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.6167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.84710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.79840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.72125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.6413269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 361.3066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.37188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.4017639160156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 4 19.0 930.602776506 (10.58735855349979, 11)
loss 360.466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 362.0282287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.19464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.4364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.0901184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.34344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.0500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7757873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.98101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6952819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.9205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.51849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.60980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.72271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.73333740234375
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 359.5074157714844
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 361.7457275390625
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 361.4424743652344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 4 19.0 945.391786838 (11.039747673816453, 11)
loss 360.474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2322692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.14453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.11376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.5156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8319396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6711120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.8719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9662170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.53289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0969543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.58477783203125
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.02947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.09649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.4625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.14849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.34722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.7195129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.69854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5451965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.4065246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.0628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.3232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.6191711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.0497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6369323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.68487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.4494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.30908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.9397888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.8447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 362.2608337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.9526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.9398193359375
############ Running episode number: 369  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.15179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1982727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1320495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.2629699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.5011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.5295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.0037536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.2105407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.07159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.09490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.66143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.2174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.9651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0997009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7329406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1432189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.8153991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.6448669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.0129699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.77850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.6375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.6177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.2450866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.73846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.3155212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 362.6782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.66143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2695007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.5556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7469787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.16241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.01922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.78826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0887756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2367858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7914123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.92327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6866760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.75164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.1878967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.67510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.44049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.60528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.1490783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.68304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.4878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.2565612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.42291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.71807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.27978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2001647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.80120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.50408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.55474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4259948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.1261291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.76904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.55950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.35931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.3876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.40643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.43804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.8125915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.3681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7874450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0030822753906
############ Running episode number: 370  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1737365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.2848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.94696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.2863464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.82135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.35931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.3347473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.55029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2287292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.17681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7467956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.95947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7474060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 362.4409484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0803527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.10589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.5687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.05841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.99505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 361.3817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5560607910156
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 359.4085693359375
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 359.0493469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7464904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.0825500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.67242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1213073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.1183166503906
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.6803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.21112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.75537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.00457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9317932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8539123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.78857421875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 1 20.0 1012.73322757 (12.501496275411796, 10)
loss 360.29443359375
Current State,action,reward,Response time,Next State:  (10, 12.501496275411796) 3 19.0 1040.79092857 (13.168618569876575, 11)
loss 359.0992126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3630065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.64593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.4296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.1217956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7846984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.2996520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.4181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.63104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6006164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.32135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.17547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.53717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7828674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.29949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.63653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.0906677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.85821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.35382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5397033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.74505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6993713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.94097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2706604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7164611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.1828918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3559875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.0052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3561096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.84710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.6475524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.1171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.67266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7682189941406
############ Running episode number: 371  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.48345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.47076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.0036315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.35345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.06829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.5600280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.6077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5262756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.4541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.72613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.56744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4002380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.73834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.8924865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.65460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.1531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.72442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.27679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4557800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.18182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.12286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.9324645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3911437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1036071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.62017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.30377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.5923767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.92010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0926208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7275695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.15142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0560607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2391052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.75909423828125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 4 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7970886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7228698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7598571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4102783203125
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 1 20.0 1259.63387034 (17.534967586021782, 10)
loss 360.0421142578125
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 360.7525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6216735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5634460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.3870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.3276672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.1856994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.1287536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.4677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2008972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.4368591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.9830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.3428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.51385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1310119628906
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 359.9025573730469
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 358.5661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.1915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.9452209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1759948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.7620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.5059509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6886291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9826354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.87652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.5245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.8056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.43304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0473327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1535949707031
############ Running episode number: 372  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6413879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.62469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4425964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.6903381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.5386657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.65655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2942199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7995300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.1277770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.07391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8537902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.26824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.63580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.63671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2820739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.4098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.93426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.0308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.72308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.52459716796875
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 2 19.0 913.272125304 (10.333617326102203, 11)
loss 360.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.65399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.69012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.00555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.78851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1873474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.69757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.8385925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.5768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00604248046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 4 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2846984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2564392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.4342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.1281433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.27960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.49072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3475646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.4285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6690979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6639709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.65142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.22662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.3149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.88201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.45050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6949157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.12335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2220764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2310485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.1631164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3663635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.5592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7527160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.61541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.86773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3543395996094
############ Running episode number: 373  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2048645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.2134704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5547180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.33441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.5158996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.73590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.35968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.70501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.97906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.8047180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2386779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.77508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9316101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2516174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.1755065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.1602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4764099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6792907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3454284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9363708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.3971252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3537292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.6950378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.73040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.89935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8664245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.0281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5557556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0295715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.10064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.49383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.5945129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.43353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.9696350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.75677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.82373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.06671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.60235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.7204284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.71240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.31964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7113342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6753234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3373718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.19989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.7944641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2154235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1548767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.81390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.22021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.94708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0739440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4234313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.39239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.02008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.5302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.1232604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.07525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.23297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.5361022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.08013916015625
############ Running episode number: 374  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5906677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.16351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.2886047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0104675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.12371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.10662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.2632751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8747253417969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 4 19.0 937.12351295 (10.772009508959538, 11)
loss 359.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.53546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9407653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3198547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1133117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.94256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.3037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2804870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.57928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.3098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6030578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.5976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.57806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.74969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.78631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1446838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.3713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.14263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.15093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.8845520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 362.00494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.71697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.46405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7456359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.7481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.86920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2413024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.5587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.57562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8122253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.23663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.0510559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.59063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.5941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.60662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6443176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.59307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.4564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7988586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.0762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.3537902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0569763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.31011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.77911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3410949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6905822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.4337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.67626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6997985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7168884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2471618652344
############ Running episode number: 375  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0275573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1249084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3392639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.72320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.2153015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.730224609375
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 1 20.0 950.798097136 (10.995673623987257, 10)
loss 358.803466796875
Current State,action,reward,Response time,Next State:  (10, 10.995673623987257) 3 19.0 960.915933313 (10.931193889570471, 11)
loss 359.7098083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.2217102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.70556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2403259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.9322204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.6348571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.095458984375
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 1 20.0 920.244245637 (10.433149880183072, 10)
loss 359.2845764160156
Current State,action,reward,Response time,Next State:  (10, 10.433149880183072) 3 19.0 931.077372094 (10.44185150623065, 11)
loss 359.2728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.85687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.85321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.70562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.6737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.60113525390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 360.302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6990051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.88134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.34136962890625
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 2 19.0 911.133954163 (10.236991269871366, 11)
loss 360.36163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5783386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.59381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.0444030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2003479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.53057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6344909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.9293518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.23992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.29376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.1185607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9582214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.33251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.4584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.72296142578125
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1877746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.49090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.76971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2783508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6229553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.71240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2569885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.7396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8484802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9639587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.3934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.29052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.34130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.0712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.5177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1131286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4259948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.2572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0050354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.73309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8112487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.86932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.12109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.13677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.13525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.3575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.57171630859375
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 1 20.0 1219.63501821 (16.147078378791146, 10)
loss 359.13885498046875
Current State,action,reward,Response time,Next State:  (10, 16.147078378791146) 3 19.0 1234.16752106 (16.229253414601111, 11)
loss 359.7001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.4886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8962707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6197814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8802490234375
############ Running episode number: 376  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.73828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1296081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.62200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.08111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.6988220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2992858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7079162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.3382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.6687927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.75634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.91748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2642517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.58807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.72015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.67022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9165344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.01190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.46337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.82830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.2723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0022888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9238586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.59954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.85394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.7395324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.11102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.27020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.71527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4969177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.83221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.83843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.78216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.58984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6476745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2085266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.92584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.4855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.15765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0639953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.4486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.29296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0402526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0978088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.61676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.61199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.30194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5530090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1870422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8353271484375
############ Running episode number: 377  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8942565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.10009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1502380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.57891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.73828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6502380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.22760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.46484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.27935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.07415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.6908874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.68109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.4992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.71295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.28814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2993469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7609558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.27789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.04052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.15087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1181945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.13916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.26922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.4449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8473815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.82293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.6463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.32904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.51629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1510314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1976623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.25311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.8450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2279357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8711853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.78057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.62860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6106872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.0665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.63677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.36602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9987487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.34100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.1155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.13214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9201965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8839416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7793884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6592102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4722595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.12982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9472961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.25677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0934143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.35821533203125
############ Running episode number: 378  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.69769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1167907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.6231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6400451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0085144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.06353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.35980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.8228454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.05548095703125
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 359.00433349609375
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 359.6694030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.88787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.36981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.59674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8199768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.23126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8086242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1841735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.70281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7089538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.30352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4250183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.22833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1037902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.05999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.52978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1039123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.58819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2721252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2494812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.903564453125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 4 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.1850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.45928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7856750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.74859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2767028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.02886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9071960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.98419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2825622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.38323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.57855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.23419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2185974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.54534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.6927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.39056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2046203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2539367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0982360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.33416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7205505371094
############ Running episode number: 379  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.86627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3614807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9339294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3167419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.27606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.28155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.72442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1573181152344
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8540954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3221740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7830505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5871276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1095275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.69952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9329528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4844055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6304626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9483947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.31414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.67535400390625
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 0 21.0 909.642131904 (10.276491935146446, 9)
loss 359.0747375488281
Current State,action,reward,Response time,Next State:  (9, 10.276491935146446) 3 20.0 961.041178317 (10.236991269871366, 10)
loss 359.160400390625
Current State,action,reward,Response time,Next State:  (10, 10.236991269871366) 3 19.0 920.672316722 (10.236272697871373, 11)
loss 359.1446838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5402526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.35546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9575500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.80755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9498596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8839416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3420715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1611633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6767883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2223205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.7223205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.85992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.53424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.49432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1457824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.18316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3867492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7461853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.34173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.5240173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.38751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.30621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8896789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.8144836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.26361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.17108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.86859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0108947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.58013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.25152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2210998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.56292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.14434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5891418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2334899902344
############ Running episode number: 380  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8262634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.69708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.45330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1523742675781
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 359.2667236328125
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 359.15301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.35772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9273986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.85028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.38140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.06109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.6241760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8368835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.2232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0364685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1939392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8245544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1911315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.5526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.30853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.34893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8245544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6140441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0660705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9426574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8573303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1762390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.29095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.92413330078125
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 2 19.0 951.466016946 (11.271571944085663, 11)
loss 359.06585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0652160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.2920837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.4306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7057800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2850646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.73944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0344543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.34368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.60784912109375
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 0 21.0 1339.64749699 (18.671267839956315, 9)
loss 360.0498046875
Current State,action,reward,Response time,Next State:  (9, 18.671267839956315) 3 20.0 1400.46626871 (19.02839494033929, 10)
loss 359.74346923828125
Current State,action,reward,Response time,Next State:  (10, 19.02839494033929) 3 19.0 1387.00434183 (19.286321916040979, 11)
loss 359.3812561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1384582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.65863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7769470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.83575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.00921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1968688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2737731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.4398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.05706787109375
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 358.8758850097656
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 358.7874450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.0262756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7126770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0453796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.53485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.68328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0680847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.6724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2195129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.9228820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7521057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2944030761719
############ Running episode number: 381  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.30084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.4053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7715759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6893005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.23480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6737365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.3304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9756774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.83697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.53692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.73150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2618713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.4377136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.27691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.9181823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.13946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.7681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.5804138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.43603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.56927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3290710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5525817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.6953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4259948730469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.25048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8081359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.54534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.41693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.14398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.8840026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.76068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.68756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8069763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6285095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.16912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.7471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.74444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7568054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7796936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.1153259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.14166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.4544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7976989746094
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 2 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.70556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.24822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.6318054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.1417541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.89501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0914001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.24609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.55474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9779968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7386169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6980895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6194763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.08367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.33795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3058166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.17633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.97808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.40777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.600830078125
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 1 20.0 1258.27117243 (17.052961248403161, 10)
loss 359.2397766113281
Current State,action,reward,Response time,Next State:  (10, 17.052961248403161) 3 19.0 1282.21925533 (17.215992726625572, 11)
loss 358.7425842285156
############ Running episode number: 382  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.15582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.00872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.64422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.4781799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1836242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.0594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6476135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1778869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.39697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1426696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.25665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8175354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.0284729003906
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 0 21.0 917.051082408 (10.344006106602812, 9)
loss 360.0752868652344
Current State,action,reward,Response time,Next State:  (9, 10.344006106602812) 3 20.0 964.575212011 (10.319026962956018, 10)
loss 359.82379150390625
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 359.1776428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.7473449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1818542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.57537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1966247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.2339172363281
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 359.06158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.42132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5823669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3274841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.92633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8707580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.5970764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.2104187011719
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 0 21.0 963.716106332 (11.670334358779868, 9)
loss 359.26678466796875
Current State,action,reward,Response time,Next State:  (9, 11.670334358779868) 3 20.0 1034.00195058 (11.819721938468785, 10)
loss 359.2015380859375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 359.5665588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.3697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.75286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.61602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.6436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.9908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.05926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.11175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.0689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.83770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.34527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.1995544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1979675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6777648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7192687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.96539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.73492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.51171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1997375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.70172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.1010437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.58258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7266540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.8014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7586364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8223571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3007507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.1426086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.47869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1491394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.83251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1521911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.8143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7049255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.83544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7927551269531
############ Running episode number: 383  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.21636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.55389404296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 359.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8765869140625
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 2 19.0 950.798097136 (10.995673623987257, 11)
loss 358.80767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6123352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.98876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.4598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6259460449219
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 0 21.0 927.560809977 (10.552868829802469, 9)
loss 359.0643005371094
Current State,action,reward,Response time,Next State:  (9, 10.552868829802469) 3 20.0 975.508144832 (10.553846649940214, 10)
loss 359.3288269042969
Current State,action,reward,Response time,Next State:  (10, 10.553846649940214) 3 19.0 937.479622653 (10.489125480251131, 11)
loss 360.8009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.25341796875
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 0 21.0 920.244245637 (10.433149880183072, 9)
loss 358.9791259765625
Current State,action,reward,Response time,Next State:  (9, 10.433149880183072) 3 20.0 969.241448633 (10.44185150623065, 10)
loss 359.56219482421875
Current State,action,reward,Response time,Next State:  (10, 10.44185150623065) 3 19.0 931.538941947 (10.370942817486826, 11)
loss 361.2509460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2396545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.73272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.4685974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2151184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.74603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.20562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9233703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.2044372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.77301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.9933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1940612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.1601867675781
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 0 21.0 913.272125304 (10.333617326102203, 9)
loss 359.3094787597656
Current State,action,reward,Response time,Next State:  (9, 10.333617326102203) 3 20.0 964.03141062 (10.390165524255663, 10)
loss 360.0731506347656
Current State,action,reward,Response time,Next State:  (10, 10.390165524255663) 3 19.0 928.797305964 (10.425974763084863, 11)
loss 360.1286315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7189025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7481994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.3523254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.34173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5815124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5513000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.61376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.36798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.7023010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6725158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.6283874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.1543273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.7318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7363586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.67218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.1886291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.42742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2749328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.7022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.92608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6502380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1557922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.8607482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3701477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.4942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9457702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.79656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.4523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.0116882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.44671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8275451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9039001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5326843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.6981506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5143127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.35888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.4452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.73583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.1926574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1024475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.23095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.69927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.18817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.631591796875
############ Running episode number: 384  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.2281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8051452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8780212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.66094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.5247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2299499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9909973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.61102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.98309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.44012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.89532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.45660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.58038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.66839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.44805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8376770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.89837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.22003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.69671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.7972106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.4795837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.5139465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.1971130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.3994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6032409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.40203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.37628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.9416809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9753723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.54193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.6226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1280212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0613098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1087951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 362.4072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.9150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.17144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.4501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.26397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.4712219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.74346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.7009582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.3699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 362.1667175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.39385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.9628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.3757629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.4040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.5054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.1813659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.78997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.76348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1335754394531
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 359.034423828125
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 359.75830078125
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 359.7251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.29571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.73651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.45751953125
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 2 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.23919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.2997131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.8773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.9647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.0090026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.04400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.4085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1455078125
############ Running episode number: 385  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.0901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.9296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8954162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.39599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.2060852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7142028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2435607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.65240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.3161926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7158508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 362.20062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.9403381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 361.58941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.14483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1023864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.3868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.20953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.3575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6499328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.4068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.3348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8623352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.37738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8840026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.89776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.5397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5707702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.74969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.9554138183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 4 19.0 931.154858096 (10.624473674922116, 11)
loss 358.72210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.3559875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 361.93585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2708435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.6471252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 361.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.69146728515625
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 0 21.0 1259.63387034 (17.534967586021782, 9)
loss 361.4814453125
Current State,action,reward,Response time,Next State:  (9, 17.534967586021782) 3 20.0 1340.98655806 (17.669285735563751, 10)
loss 359.2680358886719
Current State,action,reward,Response time,Next State:  (10, 17.669285735563751) 3 19.0 1314.91162813 (17.944480812078613, 11)
loss 362.0872497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 361.91741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.76068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.2885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.3608093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2581481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.2306823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.63702392578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 4 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.16156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.19757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.6844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.2548522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.60418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.20166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.5665588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.3480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 361.50341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7981262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 362.1046447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.3356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.98211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.5413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1833190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.6968078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.6676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6881408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.1382751464844
############ Running episode number: 386  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.4299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.46527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7215881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.8684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.3125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.1286315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 362.60919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.53204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1127624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.63916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5110168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.72003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.34014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0050354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.8202209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.23138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.6112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.2584533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8179626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.92156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7107849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.75286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.19744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.40191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 362.50665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0866394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.74951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.00433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6000061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.6698913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.50067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.15936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.4958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7704772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.05633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8609924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8255920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.30352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2021789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.19964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1043395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.34747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.5806579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.1145324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.94097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.25738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.27105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0983581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.4880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.38323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.7965393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.5725402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.51873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.3551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.02752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.3711242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.8813171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.64801025390625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 4 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1717834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.0906677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.5361022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.4825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.74200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.11822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.0110168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.8818054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.67962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.61004638671875
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 363.7259216308594
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 359.57659912109375
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 359.388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 361.1358947753906
############ Running episode number: 387  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.3405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 361.3338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1669616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.28564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.2369689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9239807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.62255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.9249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.52398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.8671569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.63458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.45599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 361.45623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.66558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.49365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.5485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.6733703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.0523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.8865661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0995788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.3388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.0498962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.16802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.1144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0326843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.3535461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5998229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.1769714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5851135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6140441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.9621887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.1532897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.4027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.6707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.7879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.7506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1924133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6940002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.57958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.5453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.71856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.16156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9848327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7041320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0094909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.56280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.8041687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.5900573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9565734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 361.5191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.5684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0367126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5885314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.8297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.1355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9286804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.4013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.79986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.2265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.38580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.1712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1388244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.2235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.68463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.5340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 363.36572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.7666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3813781738281
############ Running episode number: 388  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.5286560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3867492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.3868713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.71197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.14569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.62017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.56512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.0254211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.9027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.87841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.3157653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.06976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.0074768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.99835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.3804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.49737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.1690979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.26239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.1806335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.39532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.12408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.7490539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.40020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 361.7285461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.7938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6409606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.0919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.76300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.10302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.21990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2068176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1242980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8139953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 361.216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.4283752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.7735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.5314025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.55181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.5400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1680603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1448669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.04229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.5904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 362.44708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.5611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.36260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.76776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.2936096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 361.1809387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.54510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 362.40447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 362.1079406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.3738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.30523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 362.0816650390625
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 2 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0444030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.5479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 361.1247863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.40966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.54193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8501281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.9239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.8485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.15252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.5395202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.3625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.47589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6554260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.7788391113281
############ Running episode number: 389  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.0008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6712951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.7793273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1039123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5513000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.0455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.4620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.4950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.55572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.8087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.97039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.7603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.1060485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.21673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0082092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.4654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.6544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.6026916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.7326354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9177551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.09979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1686096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.44134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.3626403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.5269470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7192687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.18206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.0314025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.93505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.06536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.78533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.2484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.6053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.62493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.2391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6814880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.4075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2904357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.0895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7690124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.08819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.3418884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.46875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.2215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.5889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2425842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.2409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.32183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.0791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.0201110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.5433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5241394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1638488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.81634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.5031433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.20660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 362.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7601013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.52972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.07855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.2522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.86163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2910461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.1539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.19854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.86309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.5219421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.44281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7264709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.4628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.2213439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.57763671875
############ Running episode number: 390  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.56640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.4969177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4680480957031
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 0 21.0 974.154538113 (11.336751742492702, 9)
loss 361.4280700683594
Current State,action,reward,Response time,Next State:  (9, 11.336751742492702) 3 20.0 1016.54054685 (11.25610796929319, 10)
loss 360.2839050292969
Current State,action,reward,Response time,Next State:  (10, 11.25610796929319) 3 19.0 974.730436685 (11.027107764209074, 11)
loss 359.3113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.75616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.7424621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.98468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3257751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.75201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.61517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3511047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 362.28399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.35467529296875
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 1 20.0 912.494916918 (10.278181486298042, 10)
loss 360.037841796875
Current State,action,reward,Response time,Next State:  (10, 10.278181486298042) 3 19.0 922.857214352 (10.268274366284802, 11)
loss 360.2798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.1486511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.4770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.1077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.4385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.6011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.02264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 361.7772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.53466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.2146911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.5992126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 361.0996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.0225524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 362.2593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1682434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.1568298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.31109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.2273254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.7382507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.04638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0722961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1991271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 361.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.5405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.36553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.3335876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8377380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.2350158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3554992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.9867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.12640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7451477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.1722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.3360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2201232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.6116638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5159606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.6782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0755310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6120910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8646545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0031433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8280944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3327331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.78216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6231994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.84686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2264099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 361.7021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2190856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 362.5404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.88983154296875
############ Running episode number: 391  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.64349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9266662597656
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 1 20.0 974.154538113 (11.336751742492702, 10)
loss 362.75982666015625
Current State,action,reward,Response time,Next State:  (10, 11.336751742492702) 3 19.0 979.00811241 (11.25610796929319, 11)
loss 360.1512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0271301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.39630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7815856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.6424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.5904235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1647644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7414855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.5565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.56341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.3242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1980285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.23504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7582092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.8307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.7367248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.1051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.41741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.04705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.1619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.47357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 361.4257507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.84283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.2289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.7784118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.48419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6181945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.48382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.33306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.4549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.55084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 361.14501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.74603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.1592102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8598327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.28790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.1264953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.9007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.1478576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 361.1815490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 361.4582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.3590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.4017028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.8290710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.8800964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.29052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0469665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1530456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5355529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.11480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.1372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.2064514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.23431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6393737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.12445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.41204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.2633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.27484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.7501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.26605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.47039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.17816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3127746582031
############ Running episode number: 392  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.7906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.2638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 361.8361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.5908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 362.14288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.2242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.13275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7624816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.37432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9276428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 361.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.4559631347656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 4 19.0 925.789969445 (10.489125480251131, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.5650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.2474670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7864074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.59637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 361.9677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7843933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 362.6780700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.89239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6871643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.8363342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1728820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.27777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2266540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2127685546875
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 0 21.0 914.152581784 (10.390165524255663, 9)
loss 359.5843505859375
Current State,action,reward,Response time,Next State:  (9, 10.390165524255663) 3 20.0 966.991429728 (10.425974763084863, 10)
loss 359.9162902832031
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 359.7853698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.86700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.8006896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.43389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.32000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.81219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8951721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.8945617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.5575866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.3533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7955627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.49407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.5323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0826110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.46112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.74951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 361.0189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.0319519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.5539245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0777893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.6144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.12200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2903747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.4136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.9696350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5744323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 362.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.5589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.09613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.6075744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.5532531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2875061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.6946105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.2322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.4412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.44720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4086608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4375305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0166931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.6559143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0582580566406
############ Running episode number: 393  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.65625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4388732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.7333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 361.29705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9402160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.42218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1977844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.66973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5577087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 361.26153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.6513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.1706237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.41180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.12554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.5850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98797607421875
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 1 20.0 919.871906942 (10.370942817486826, 10)
loss 360.3743591308594
Current State,action,reward,Response time,Next State:  (10, 10.370942817486826) 3 19.0 927.777654938 (10.42733414151318, 11)
loss 359.18719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7151794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.0768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.4305725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.9221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.4918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.75274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0313415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.2097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.43634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1923522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.3951721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.22314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8347473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 361.48040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.1825866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0810852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9909973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.22943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.5395202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.5420227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.56658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6559753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2682189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.27203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.5946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.55682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.59906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.5203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.58892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.6163635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.51336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.6489562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.6321105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.50921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6311950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.2295227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.88336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.41522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2069396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.15008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9677429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7554626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.19818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.42901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6212463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3248596191406
############ Running episode number: 394  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.52215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.21038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.39727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.96588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.5726013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.1040954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.7438049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6125183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.6928405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.64923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8174133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.5373229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1388854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.5128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1062316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.1562805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.38067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.72161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.57806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.52178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6519470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.0965270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.20751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.52337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.40704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.55999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0260925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.71630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1336975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.46368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.55584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7973937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.4302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.0841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8413391113281
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.6383361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2437438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.9053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.78179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.72515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.19158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.9982604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3262023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5564880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.5302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.74505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3934631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1318054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8027038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.13677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.4294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.72698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.42340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.8006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1280822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.4054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.6065979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6095275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.6906433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.8096618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.7673034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.1352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.27752685546875
############ Running episode number: 395  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9147644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.59002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.3064270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7440490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8402404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1040344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.6798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4712219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6268005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2853698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.24652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.25262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.74139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.81597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1081237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.1636657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.32696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.78387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.76104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.7403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 361.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.0198974609375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 360.0010070800781
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 358.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.73663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5519104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3834533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.65728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.01666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1596374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.2804260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6526794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.44097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7994079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.01165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.0740051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.1041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.56890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.91534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6578674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.59796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.21697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7520446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.8220520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1329650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7958679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.87518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.97296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8039245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9333190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.31427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.26959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1115417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7309875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.5693664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.4717712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.55865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8597717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3296203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.24609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2861022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.3558044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.54229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.11383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.5495910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7049865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7804870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7884216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.72515869140625
############ Running episode number: 396  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.82086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7641296386719
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 2 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8341369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.18145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8080139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.90545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2605895996094
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 359.69842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 362.4205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6575622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5578918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.91204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.21527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1210632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 361.2516174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.1021423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.63043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.27496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9595031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.47711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.7298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.19891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.433349609375
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 0 21.0 911.133954163 (10.236991269871366, 9)
loss 358.8521728515625
Current State,action,reward,Response time,Next State:  (9, 10.236991269871366) 3 20.0 958.973513426 (10.236272697871373, 10)
loss 358.61883544921875
Current State,action,reward,Response time,Next State:  (10, 10.236272697871373) 3 19.0 920.634200723 (10.369891240151098, 11)
loss 359.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7695007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.0860290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.23681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.35565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2176208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.19329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1632385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.03411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.80743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.5666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.43414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2920837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3018493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.08648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.17803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.28656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6950378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.92193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.70684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9334411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 361.94317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7972717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.73248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8439636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.9130554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.5457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.8639221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.04522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3450012207031
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 2 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.68194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1694641113281
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 358.8963928222656
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 359.1213684082031
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 359.5302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.66729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.80316162109375
############ Running episode number: 397  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6266784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2159118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.2417297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8017883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.76934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2893981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2464904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2723693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4087829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2708435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.25030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.57879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.95684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.80291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.31939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.86962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.22021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.40234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0921325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2533874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.4088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.08538818359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 4 19.0 914.247384359 (10.305649118067803, 11)
loss 359.52850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1373596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.5865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.91204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9269714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.19525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 361.0705871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.6513977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.91650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9653015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.13311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.31494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.10064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.78887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1458435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.25006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0278015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.0112609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.69580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.39990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.1321105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7749938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.13421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2824401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6545104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0895080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.14251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.12713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.6665954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9929504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.4906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.18267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.34136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.30157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.2816467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5651550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.56512451171875
############ Running episode number: 398  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1227722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.4048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.6967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9393615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.90704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.05499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.9894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.39788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7107849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8327941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.26751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.47662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.89349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.97247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5525207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.47796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6699523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.74468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.36859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7323303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.4629211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.67120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.96392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.10888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.68585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6743469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.22369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0111999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.93389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2130432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.31658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.5614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.5904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6886291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.20623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.22857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.54449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7950744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.64044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8283996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0856628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.7637023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.44775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.69696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.12158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7542419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2873840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.92816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.5298156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.53240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.51165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1452941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1099548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.27716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.0447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0921325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8514099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0220642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0523986816406
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 0 21.0 1203.91462651 (15.829956988360925, 9)
loss 359.08612060546875
Current State,action,reward,Response time,Next State:  (9, 15.829956988360925) 3 20.0 1251.7376675 (15.892373986997768, 10)
loss 358.9005126953125
Current State,action,reward,Response time,Next State:  (10, 15.892373986997768) 3 19.0 1220.65695786 (15.954793861767499, 11)
loss 360.5968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7519836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.5251770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.8439025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9932556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.8717956542969
############ Running episode number: 399  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8342590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2558898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2901916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.28704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.41058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.80828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.4388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.6507568359375
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 1 20.0 930.602776506 (10.58735855349979, 10)
loss 360.6942443847656
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 360.9520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3649597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7597351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.1481628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.13214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.9298400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0863952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.2402038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.66156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7818908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.45562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.6159973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.42791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1305236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.56671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2815856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8212585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.0954895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6298522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8893737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.25
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.3632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.4908752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.56573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.37542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.19097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.6862487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 361.7010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.39080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3652648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 361.00946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.2094421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4575500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.2687072753906
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 2 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.39276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.10882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2800598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1729431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7588195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5525207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.6518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6830749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.7204895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9621887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8700866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.380859375
############ Running episode number: 400  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3580017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.20745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.01629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1380920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.6148986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.1302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 361.24395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.15057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.01513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1997985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.12322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1087951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.40625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.99383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.92236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2809753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.8249206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.03253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.76007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.8548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0747375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.0705261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1346740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.5210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8159484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8491516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5967712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7305603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0634460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2259826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.3605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.2919006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.80975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.9975280761719
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 1 20.0 1179.43847566 (15.836943704090487, 10)
loss 359.8268737792969
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 360.8226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 361.25933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3049011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.05267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.7899475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.97662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.79010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.15252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.1007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4063415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.1230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6709899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.4188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2608947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.5036315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.85394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.1090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.72332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4076843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8241882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.61138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 361.4290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7055969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.04144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.11480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.76617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4158020019531
############ Running episode number: 401  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.78411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.22332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.4939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7770080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.04388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.49884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.51190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2211608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5725402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.69439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2753601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.2655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0945739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.51654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.57562255859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 4 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2326354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5669860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.6908874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 361.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7895202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1470642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.61224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.21221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.58587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6906433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0233459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.1582336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.9387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.70391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2400817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7202453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.5126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2173767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.23626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.01898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.84100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1553649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.8629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.98529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.4014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.92529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.36834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.30596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9438781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.39581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5841979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.57403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8957824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3833312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.33935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7899475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7565612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.3027038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.26470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2658996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7026672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.88714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8800354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.83001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.54388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.82818603515625
############ Running episode number: 402  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7712097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0284118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.41546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.25628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8400573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4891052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.22442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0292053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.41387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.67218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9366149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3043518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0401306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7669982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.82562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0793151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.5609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6067199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.2548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1673278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1428527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5168151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1213073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.33111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8777160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2113342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.05810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.60357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7616882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9132995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2481994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.37835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.65087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6478576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.29180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.19354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.1163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.5779724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.28289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.28326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1519470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7057800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8716735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.6666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.87896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.4231872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6859436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.77996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6156311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.1335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.60302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.50518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0604553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.13006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8981628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.4645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0852355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.07635498046875
############ Running episode number: 403  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.2380676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.42608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.98736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.09698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.24896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0743713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.17730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.79913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.74847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3113098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.91900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3119201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.35711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3620300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7658386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7834167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1694030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2676696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.8334655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1595153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1811828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.03857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.79083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7479553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 361.1820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.1524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.29534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.89898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3699035644531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7484436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.34478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9709777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7567443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.2480163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.83709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.71771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2494201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2660827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.5180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0290222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.40283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.09368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.7368469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.5429992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9483947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.66253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2445373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.2137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1733093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7370910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.22650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.26611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.94342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.26092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.5908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2132873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.8232727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.5539245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.8346252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6540222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.832763671875
############ Running episode number: 404  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.5257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.37298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6210021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9979553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7676086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.42791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1960144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.24468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9300231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7868957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1463928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.37872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0704650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.7440490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.74591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.29150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8290100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2248840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.4576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0703430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.1572570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.78582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.20220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.11053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.25860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.15484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.82244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.3063659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.4669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2780456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.5723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.80859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.21337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.8161926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.44793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.75787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1069641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1492614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.14276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.05194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0229187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.0829162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8304138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.5404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.90863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.2409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 361.4278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.83343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7703552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1216125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.79083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.89349365234375
############ Running episode number: 405  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.53509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.6682434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0534362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8949279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7607116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8158264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0484313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8833923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.21282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9891052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0390930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.8457946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.85931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.49139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.7236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.55987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.05096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.84796142578125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 4 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.31793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.35546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1390075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.89080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2206115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.83984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3385314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7864074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3880615234375
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 1 20.0 919.032945938 (10.546025383098053, 10)
loss 358.8367004394531
Current State,action,reward,Response time,Next State:  (10, 10.546025383098053) 3 19.0 937.064750655 (10.655373370049301, 11)
loss 359.28948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4076843261719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 4 19.0 929.522052234 (10.771376986314287, 11)
loss 359.70556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.51263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1247863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.61419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.1623840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.13690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.35089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.9017639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.71893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.59759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1030578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.35284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0199279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.18084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.8132629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.98150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2375183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1201477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7282409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.1585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2668151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.3805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0910339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6116027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.74237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.5003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.35400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.42864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.7571716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1742248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.04412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6533203125
############ Running episode number: 406  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4393615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.4981384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3619689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2778015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.1562194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2724914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.1003723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.57745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.6450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.57611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8277893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.07470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.0172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9757385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.7882385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.0091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.16314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.22589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9705505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.61798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.6533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2227478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5852966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2281188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.09600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.85528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.6406555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2309265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0143127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.02447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.77288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0408020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.99554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7460632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1571350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.4208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6073913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.65509033203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 4 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9607238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.11871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.23541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.02044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.5509338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9872741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.9966125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.96478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4231872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0190124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0569152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1969299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9648742675781
############ Running episode number: 407  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6943664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7264099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.83355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8576354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.5262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3515930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.78802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.58282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7599792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8539123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.98931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7698059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4863586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.30267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.74462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9062194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.02545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.2005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.5865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.5666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0296325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.98828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.76214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3642272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.98388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.7035217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.00567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.14703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.62841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8982238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9585876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1337585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.6610107421875
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 0 21.0 1379.54110953 (19.385636054792762, 9)
loss 359.0862121582031
Current State,action,reward,Response time,Next State:  (9, 19.385636054792762) 3 20.0 1437.85991935 (19.223969507401588, 10)
loss 359.0413818359375
Current State,action,reward,Response time,Next State:  (10, 19.223969507401588) 3 19.0 1397.37841716 (19.25591252280865, 11)
loss 360.0683288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.56640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1684875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.90631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9756164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.99310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.28997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.66925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4140930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.31756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8602600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.74102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.1259460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7032165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8052673339844
############ Running episode number: 408  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3193054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4039001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6579895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.34869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.36932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7696228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.19964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8286437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8354797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8530578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.96466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1813049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1468200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.76123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6366882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.29638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1656799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6446838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.17852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8711853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0763244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4315490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3045349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2652893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.35699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8341979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.42462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.23443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.7016906738281
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 0 21.0 1143.69153516 (15.353965082180355, 9)
loss 358.97967529296875
Current State,action,reward,Response time,Next State:  (9, 15.353965082180355) 3 20.0 1226.82184023 (15.836943704090487, 10)
loss 359.7762145996094
Current State,action,reward,Response time,Next State:  (10, 15.836943704090487) 3 19.0 1217.71670884 (16.466876895473597, 11)
loss 359.6788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.17352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8677673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2901306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6658020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.02825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.6244812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9856872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.0762023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9797668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.61395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2349548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.06475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.89935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6388244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2274475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7926330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7729797363281
############ Running episode number: 409  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.64178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5083923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1395568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.9535827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.80120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.8792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9492492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.34271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2408752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1941223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.17547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.3259582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3451843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.74078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.24615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.66851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.15155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0953674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.65911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.9602355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.93743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.26312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0495300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.22607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9411926269531
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 1 20.0 984.787563682 (11.819721938468785, 10)
loss 360.56005859375
Current State,action,reward,Response time,Next State:  (10, 11.819721938468785) 3 19.0 1004.62682792 (12.19918626616789, 11)
loss 359.33807373046875
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 0 21.0 1012.73322757 (12.501496275411796, 9)
loss 359.7374267578125
Current State,action,reward,Response time,Next State:  (9, 12.501496275411796) 3 20.0 1077.50917513 (13.168618569876575, 10)
loss 359.34930419921875
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 360.5705261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3518371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.23748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1586608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.1516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9402160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3377990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9768371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.66864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.5675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7571716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.74908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.24224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3529357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.87646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3436584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7454528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.95050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.6787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.30230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.6711120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2121276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8397521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.30078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.58392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.6824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2184753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7131042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.3504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9537658691406
############ Running episode number: 410  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.1743469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.74835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.81292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.31402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.1319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.6403503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6185607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1011047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2565002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.36798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3208923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.63092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.49072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.28314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.48162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9729919433594
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 1 20.0 914.701547126 (10.319026962956018, 10)
loss 358.77691650390625
Current State,action,reward,Response time,Next State:  (10, 10.319026962956018) 3 19.0 925.023825574 (10.30224719189987, 11)
loss 358.7651672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.47259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.61126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2892761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.68768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1359558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.46319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.0461730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.12542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4753723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.2186584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.5994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7762756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.77288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.24957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.45172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7610778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.72540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0720520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3086242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0202941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.02789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.72039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.80474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2297668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.74530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4406433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6870422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.31658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.17169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.60650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.79205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8350524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.46319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7027282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8016052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.78497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8634338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.09429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.3617248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.45526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9660949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2179870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.12103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0841369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0621032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.36785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0069885253906
############ Running episode number: 411  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.1092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.4131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8407897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.2840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.2565612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.55126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.0892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.65338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.32720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3270568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3707580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8146057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.56640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.75958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.5720520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.8570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.50396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.0660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1038513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4338073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.12066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.25018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0396423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8286437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.34356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.55413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.70587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.75079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.35614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8036804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.7786560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.62274169921875
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 359.2765197753906
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.64263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7272033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5236511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8177795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7635803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.8265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.4959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3017272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.8825378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8801574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7644348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.28057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.96197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7547912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.6135559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.35736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.35101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.30059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.5604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.67950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1644592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.37298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3581848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4447937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0080871582031
############ Running episode number: 412  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.7474060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9728088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.3287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.7914123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2806701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.50299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.04364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.09173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3151550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1444396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.76885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1351013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.19342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8688049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.0491638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.30108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.2950744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.99005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.08050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0445861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7239685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.6270446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.42999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0326843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0907287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.58447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.5337829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.20355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7146301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7864074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8247375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7173767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.93890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8129577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0249938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.71673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.80419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.53179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.9124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 361.3720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.11590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.12860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2099304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.07342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7560729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.33380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2585754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.24761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.0785217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.6941833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.65435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.4652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3395080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.5318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4435119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.29449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.21563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8569030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.5957336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.97528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1968688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.35906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.40386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7818298339844
############ Running episode number: 413  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.99267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5636291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0370788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1199645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.5612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7954406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0929260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9862365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.63970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.5526428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3269958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.70458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.42156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7945251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2724914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3955383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.21331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.21954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4748229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.23193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.26666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7823791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5673522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.39239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.81732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.45538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8907165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.44061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1458435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.4516296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2257995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0559387207031
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 1 20.0 1316.32685758 (18.385807405229915, 10)
loss 359.32281494140625
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 360.3729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8930969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.0910949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.16705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.5087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4536437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.41729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.44854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.24688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.95513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.07330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.05169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.79510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.6658630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3343200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6785583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.63134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.4534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.0819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3489074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.01007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 361.8575744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.81488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.8561706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.19989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.37152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5513610839844
############ Running episode number: 414  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.08123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.2856750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.5721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.7003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7347717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.5413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.05780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9677429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0935974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.5899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.2440490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.23516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.9358825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.7349548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.5173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.8017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2078552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.02392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5549011230469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9833679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.5857849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2425231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1784973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0857849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1743469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.7406921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.90301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0616760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.28179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6994323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.84478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8262023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2840270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6606140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3839416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.66607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.1516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7393493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1039123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1662902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.27813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.24932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.4081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.4721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1095886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.50439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.17431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1839294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7851257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0570373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.18328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7195739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.33563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.5841369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6920471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.6225280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0953674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6720275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.38909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.19866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.68658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1221618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3047180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.56304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9744873046875
############ Running episode number: 415  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2571105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.73602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.80340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.73394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.64715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.63037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.39093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.08935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.75189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2420349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.5132141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8900451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2275085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7846984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6965026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.8155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.29498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.65716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.26910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1033630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.58563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.72186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.29534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.2651672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.40936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7596130371094
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 1 20.0 963.716106332 (11.670334358779868, 10)
loss 358.92529296875
Current State,action,reward,Response time,Next State:  (10, 11.670334358779868) 3 19.0 996.702699398 (11.819721938468785, 11)
loss 361.38653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.44647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7852478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0617370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.31964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6143493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3158264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.5711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8775329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.36749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1542053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.60150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.55865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1452941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.95831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.77716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0742492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.5733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7755432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.6078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.97186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7737121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.54217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.66259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2608947753906
############ Running episode number: 416  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 0 22.0 1029.06659875 (11.973514343585284, 8)
loss 358.99468994140625
Current State,action,reward,Response time,Next State:  (8, 11.973514343585284) 3 21.0 1070.83307124 (11.786394321941378, 9)
loss 359.34130859375
Current State,action,reward,Response time,Next State:  (9, 11.786394321941378) 3 20.0 1040.0771169 (11.61852219546234, 10)
loss 360.004150390625
Current State,action,reward,Response time,Next State:  (10, 11.61852219546234) 3 19.0 993.95437024 (11.469111876584304, 11)
loss 359.3922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.39923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.71807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.5115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8199157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.2076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8936462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8455505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.1648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6005554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.9408874511719
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 4 19.0 919.412094444 (10.44185150623065, 11)
loss 361.2865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.53875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.43121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2906799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.39239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.20458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.82171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.79248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.2563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.37164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9190368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.5509948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2405700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5094909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8219909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.6925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.31988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.65936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3061218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.24896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6858215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.40972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.35308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0502624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.03741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.07470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0997619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.8736267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.80316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.73846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.39886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.16552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.01177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 361.281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2473449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.96466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3619079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 362.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9521789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.14068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.5061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.3594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3316345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.47674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.35040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.08544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.0151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.56170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.4407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.4914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6675720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.73065185546875
############ Running episode number: 417  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.73016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 361.1886901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1043395996094
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 4 19.0 967.160346038 (11.25610796929319, 11)
loss 360.9114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 361.0934143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.7498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.14788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.6423645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.84320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0438537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.0107116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.6790466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5962829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2665710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9024963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 361.0357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1322937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1788635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8812561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.5261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8030090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.11883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.5664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8367614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.31976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.15997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.3536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.73150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.65869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0146179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.42816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.5664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 361.3078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0734558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6708068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.2416076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0730285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.5034484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 361.38037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2462463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.86431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.27001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.40740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8082580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.45428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.34375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.26214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.2282409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.3028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.7540588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1600646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.6414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7326354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9922790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.8070373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.8138732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 361.4624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.45794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.84649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2434387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.26153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.2425842285156
############ Running episode number: 418  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.89764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0821838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.3180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1487731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5133972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.3265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.71710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0381164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.5384826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3503112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.15594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.12420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0575866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.59210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.88275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.56494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.49920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.87945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8946838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.73809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6980285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8881530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.2342834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.19091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 361.20867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.55322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0219421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.13189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.76568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.8201599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1927185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8939514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.24017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.2334899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.74896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.16021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.30133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7621765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.19927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.22412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1976013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9709777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9709777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.13262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.23992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.33612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.13720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5808410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5821228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.2118225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.88885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8243713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7289123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.4783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 361.14874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.05267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.15545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8173522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.45654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2006530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.13421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.44549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.98504638671875
############ Running episode number: 419  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.02001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.5055236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9512023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2649230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.72845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.73321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1278991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.79901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.13140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7289123535156
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 0 21.0 937.318160694 (10.644925616761762, 9)
loss 359.8811950683594
Current State,action,reward,Response time,Next State:  (9, 10.644925616761762) 3 20.0 980.32686333 (10.58735855349979, 10)
loss 359.02264404296875
Current State,action,reward,Response time,Next State:  (10, 10.58735855349979) 3 19.0 939.257231149 (10.552868829802469, 11)
loss 359.68585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.4761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.16802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.53387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8680725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8821716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2470397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7622375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 361.4861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.6009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.4241027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4972229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.90130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.32421875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0194396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0566711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.74957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 361.1745910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.15277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.83282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6889953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.09326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.79705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.01092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.4813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.0614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1169128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 361.2834167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0240478515625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.55047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.24505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.6990051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.14453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.9661560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.73199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8320007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.9563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.1701354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.90399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.5937194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.8197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.09417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.78570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1354675292969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 4 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.93890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.75103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.63677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2446594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.4371643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.93585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.5391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2809753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.4073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7389831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.1703186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1403503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.77960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1455993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.77520751953125
############ Running episode number: 420  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.9959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.07232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.35076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.5543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1497497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.18804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.13629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8099670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.0479431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.3746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1854553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2437438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.95806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.1123962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5911560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0903015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3197937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.48797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.80291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3575744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.68499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7659606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2746276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.2397766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.5529479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.01416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.36468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.81878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.72247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.1037292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.61260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.8717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0542907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6700134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0890197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.98016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5202331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.50531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.0106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.52484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.5022888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.02325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.3232727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7890319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4060974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.94970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2745056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.13958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.27069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0314636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.69842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.13421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.8526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8037414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5408020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.55645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7156066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8928527832031
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 1 20.0 1207.88915169 (15.954793861767499, 10)
loss 359.369140625
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 358.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2571716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.6992492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1838684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.3265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3312683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4063415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8777770996094
############ Running episode number: 421  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2335510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.68017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.6500549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0899353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0575866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.38397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.16046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.3340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.2207946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.21624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8503112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2881774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.88262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9487609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.5965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.29827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2629089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.79083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.21087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3219909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.62689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7998962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.19757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9741516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.4649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 361.1314392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.14373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8780212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.2709655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9258117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8497009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.3785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.9948425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.7171936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6192932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.84271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.20513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6620178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5097961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.4018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1166687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.5008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1167907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.35009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7441711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.84521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7856750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.28533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7135925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9561767578125
############ Running episode number: 422  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8201599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7554626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.80267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8411560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.6548767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8439636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.18121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.88690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1890563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.5863952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1293029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.3495788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9552307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1332702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.86151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9037170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0077209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.86431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.82598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0138244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9114074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.01483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6521911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0422668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2909851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.82904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.1683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0812683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7503967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6875305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.73236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.72265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.23309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2399597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 361.19451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.47882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.16619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1054992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0176696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.98846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.72528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.1169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9753112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6756286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.74322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 361.419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7543640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6999816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6094055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0829162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.6574401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8622131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8789367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.32891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6300354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.36871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.7464904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.80419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.73583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6266174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.75665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5107727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8145751953125
############ Running episode number: 423  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3696594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.3558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.54083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.62237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1571350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.78277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.2726135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7162170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1099548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.6590270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.34027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7467346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1349182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3754577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.04058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6682434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9819030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0406188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2821350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2384948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.52020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.1046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.31622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.9468078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6286315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.69244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.62298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.19036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.93121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.77520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.4160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7979431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.63427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6161804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8636169433594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6578063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3114318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.88702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0479431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2701721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.4364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2023620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8454895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3813781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8927307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.7376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.06646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.0352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 361.591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3780822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.1677551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.71075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.1372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.3592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.1684875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.61285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3290100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1411437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1177673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7937316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8515930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7825927734375
############ Running episode number: 424  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8620300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1142883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3150329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9204406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2665710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.4249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.78424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.66241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8038635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0647277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.57275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.62457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.7216491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.59149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9913635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.81549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8274230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.79583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.6145324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.48431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.89508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6722717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.21484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3157653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.09259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2452697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.31219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.32183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.47393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.21600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.99468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.95013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2445373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8780822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.81268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9450378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.5669250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.48248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0731506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.8006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7912292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3335876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8454284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7695007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.33624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.947021484375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 4 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9674987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.75299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0243225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7069396972656
############ Running episode number: 425  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7984313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7295837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.86114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8511047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.27752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.3990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8178405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7998352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.00201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.70654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3386535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.03607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.18731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1262512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.06817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1690979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.16033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.30572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.5249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6201477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8619079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7935485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.3099060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1325988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6789855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0418395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3401184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.11175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7361755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1886901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7939147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.14178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7532653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.6818542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.89508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.2438049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.71185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.36639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7182312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.4938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.31689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0002136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5675354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9241027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.25445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7466125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.4568176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.24298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0125427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.55609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9045104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0550231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.97052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.75994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0201416015625
############ Running episode number: 426  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8507385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.19940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3407287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6972961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2046203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.85015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.42889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7064514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1137390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.94500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.78369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.02392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.82098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.63897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8100280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.27447509765625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 4 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.20684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7447814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0812683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9141540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9729309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7100524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1643981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0371398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.14251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.88677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9522399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.85003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1618347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.88433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1810607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7850646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1861877441406
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 1 20.0 1390.09363446 (19.213467265587269, 10)
loss 359.0242004394531
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 359.54644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.76544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.69207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.12615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1007385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.50213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1331481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.93701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.82861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8749084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.89593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0639953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5541687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.74560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.84149169921875
############ Running episode number: 427  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.4521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1490173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9603576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.48663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7088928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7062683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.10919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1369934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.76055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.28045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6840515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.71331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1612243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.87396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.14373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8227844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.14935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9948425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7163391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7171325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1997375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.24462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7292175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9808044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6171569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2870788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1470642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.95855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1623229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1690979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.0636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.73779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.39910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.79351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.04638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.21343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0478820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8158874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.33160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2392272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1016845703125
############ Running episode number: 428  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.10955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.76806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.74530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.11444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0544128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9863586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7633361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0715637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8310241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.08648681640625
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 0 21.0 910.69972028 (10.335411397720526, 9)
loss 359.3707580566406
Current State,action,reward,Response time,Next State:  (9, 10.335411397720526) 3 20.0 964.125321415 (10.305649118067803, 10)
loss 359.7695617675781
Current State,action,reward,Response time,Next State:  (10, 10.305649118067803) 3 19.0 924.314209939 (10.24826025489064, 11)
loss 358.9731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3608093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9606018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.60357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6659851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1305236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4894104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4086608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8893127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.8617858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.43701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7182312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.1602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.64727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6797790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.69482421875
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 2 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8608093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7451477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.99945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.51068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6783752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.0337219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0840759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.34759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0686340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8968200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.04046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8226623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.67303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0058898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.92510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.19879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.35809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7201232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.29632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8412780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2333679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7432556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.271484375
############ Running episode number: 429  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9735412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.30255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3890075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1127624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8986511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.57965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7278137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.69482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.65386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.36138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.3108825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9901428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6136779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1264953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7723693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9474182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3589782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98193359375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 4 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8824768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3961486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2828674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8177185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.5965881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8880310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.83795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.05206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.95416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0275573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.73480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.38525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.97198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7148132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.01611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1004333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.40020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.52001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0973205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.92633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.72381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7514953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.65118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8492126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.28057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7501525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.9836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0843811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1266784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.23394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.05084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.07708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0787658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0111999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9460144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8039855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1621398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6183166503906
############ Running episode number: 430  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0124206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1049499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7098083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.5817565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1029357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.94561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.04095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.73724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2083435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6542053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0922546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.84881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9616394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.20660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7259826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.10504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.32562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.35858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6655578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.82977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9986877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1753234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.18768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.98614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6604309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.17681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.23944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.49444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7919616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3003845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.67236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.92059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.77252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.94561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.77166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1105651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2222595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7124938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2616882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.57623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.18194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0746154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7710876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9127502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.18353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1385192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.6835632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.52606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0055236816406
############ Running episode number: 431  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2776794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9952087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.52886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1158142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8139343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0758972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8955383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.95648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7764587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.60125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0403137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.96551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.74591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.36468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.81671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8911437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2576599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.09246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7605285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.14495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.24786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.89031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.76715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1412658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.74688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2037658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5719299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.04290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.69451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.11273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.09234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3302917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0829162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7351989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8653259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.81451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.01983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.41943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0378723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.8186950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9845275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.6092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6854553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8819274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1816101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0885314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0644226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9576721191406
############ Running episode number: 432  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.71478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0732727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.96484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.88934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.4167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7561340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1322937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1430358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6921081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.21893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0736389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0409240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.4529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.75726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3543395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1108093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2992858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5300598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.02215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.10321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8132629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8225402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5601501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0081481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3741149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6404113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.37176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7412414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9808654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0943298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.88299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7953186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.09881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2768859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.890625
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.61065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.5796203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.5502014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0879211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1625061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2578430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7883605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.0207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0139465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8182678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8010559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.38677978515625
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 0 21.0 1248.87152463 (16.836383524612351, 9)
loss 358.8321533203125
Current State,action,reward,Response time,Next State:  (9, 16.836383524612351) 3 20.0 1304.41912996 (16.845818065953559, 10)
loss 359.0292053222656
Current State,action,reward,Response time,Next State:  (10, 16.845818065953559) 3 19.0 1271.23153331 (17.052961248403161, 11)
loss 358.83697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.37200927734375
############ Running episode number: 433  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7933044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.65362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9729919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.1888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9203796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.78076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.75982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.5439758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.86053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.55670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7452087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.12994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8908386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.1285095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.1960144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8300476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.38250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1550598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.41497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0017395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.5710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.8463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2467346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0455627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.5887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.8707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.1342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2629089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7846984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2655944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.90509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.31329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.74822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7633361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2927551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0917053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0115051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1508483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.88330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8478088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.87158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9671325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2724914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0102844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.38922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.74359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6388244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.11590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.80889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.15380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2145690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8995666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.81982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8336181640625
############ Running episode number: 434  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.19415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.76812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9402770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.13031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.6474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.31964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7398986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.13543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.6456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1205139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.98211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.63995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3750915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1918640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.68487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7013244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0855407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.88922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8939514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.18438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.2547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0209655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.16485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1846618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.23980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7879333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1551208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7843322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1178894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8914489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7990417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.58966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3266906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7028503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8346862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.10894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.89202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9928894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9966125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1444396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7756652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9416809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9874572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.1532897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1661682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2101745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.36639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7912902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.77899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2011413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1328430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.99249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9416809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9440002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2604675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9165954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0515441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.1827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0682678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.43084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8195495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1119079589844
############ Running episode number: 435  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9062805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.2301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8052673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.72705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 361.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.63873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.09912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.7535095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.27679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.6439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.8356018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.96356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3360900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.21868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.07958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.6542663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1649475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.02325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8697814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0799865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2873229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4635925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2304382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.8470764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.0433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7062072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8871765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.38934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.26678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9938659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7142028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7764587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.72625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.07855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9967956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.70135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.02874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.94183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.6676940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.6983947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.03668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7847595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1706848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8642272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.1513366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2837829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.68670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9982604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8019714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7857971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.71124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0734558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.91998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2518615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.00738525390625
############ Running episode number: 436  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0589294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.48846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1573791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1094055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3151550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.0245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.12261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8220520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.2471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.4785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.87908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9804992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3908386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7873229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8766784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2070007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.39801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.06158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0249938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.3642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.37286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8097839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.3680725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7326965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7826843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.19146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7931823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0665588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6498107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9205017089844
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 2 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9032897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1898498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.30535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0984191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9014892578125
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 0 21.0 1316.32685758 (18.385807405229915, 9)
loss 359.0616149902344
Current State,action,reward,Response time,Next State:  (9, 18.385807405229915) 3 20.0 1385.5238237 (18.671267839956315, 10)
loss 358.7955627441406
Current State,action,reward,Response time,Next State:  (10, 18.671267839956315) 3 19.0 1368.06085906 (19.02839494033929, 11)
loss 358.9841003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.13134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.98822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.95477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7785339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1202697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.8623352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.11053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.53619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0285339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.25762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8211364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.96380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.83447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.24017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.46392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.08935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5676574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.06866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1858825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2598571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.48419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0152893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8125915527344
############ Running episode number: 437  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 361.55413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0177917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.46002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.9055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.73785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.98077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.52008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7632751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4294128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0564880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7776184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7900695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6000061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.78741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7263488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.97467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.7359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.20806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8017272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9353942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.11407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.9464416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7993469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7909851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.51422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2737121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.17822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.68170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.13946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.15216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.22833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2511291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.78076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.71148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.68658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.72021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.75433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9519958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.70489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.11944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6585998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7178649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.1296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1434631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.9313659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9401550292969
############ Running episode number: 438  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3607482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3825378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.4521789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7427673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1354064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.10528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.6723327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2541809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.19647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0641174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.34552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.16326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0096130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.7477111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.14984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7461853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.10528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9314880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.12518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2622375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6903381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.08770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2232971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.81610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9576110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.77490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9522399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.83026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.6374816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0122375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0090026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8883972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.73712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.12933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6651306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.00616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8698425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7181701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9911804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8343200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9851989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8616638183594
############ Running episode number: 439  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8611755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.08172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.83184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7364807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0427551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9519348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.5636291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2376403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.26983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2616271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9723205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.26007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.19793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7710876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2522277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.76458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9371643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6779479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9284973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6535949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.20379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.06378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.8331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.91046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2921447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.29156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.01263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0759582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.62518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.70135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8260192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6040954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.07879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.05535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.68658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.7004699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1689758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0539245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.84564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.77587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0183410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9546813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4252624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.94342041015625
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 2 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.8072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6064147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.95684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7633972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.76739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3334045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1970520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7652282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.22711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2801818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.97955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1462097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2581481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.12408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.618896484375
############ Running episode number: 440  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.39892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.88433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.12750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9776916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0357666015625
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 1 20.0 962.898956888 (11.027107764209074, 10)
loss 358.77008056640625
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 358.8961486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7944641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.67559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2538757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6966247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7339782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8169860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0137023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.94403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.6109313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7697448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.79132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.72711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.01263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9712829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.59344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3365173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.79058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.0693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.2681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.91522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.17559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.14984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.003662109375
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 1 20.0 1122.88439768 (14.677479537099185, 10)
loss 359.6234130859375
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 359.8012390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.89215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.58514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.86688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.1673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5060729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.6534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8096008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.51446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.75860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.82281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.25750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6676940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.78509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7329406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1347961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7561950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.32568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0717468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8612365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.74102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.11358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9640197753906
############ Running episode number: 441  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.35406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.78985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7068176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.5816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.92938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.15966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0992126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7093811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.82977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.0091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7725524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8196105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.4853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.2001647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.61383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7442932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7752990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.4085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.6551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.28045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.32965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.8385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.22259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1322937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.31988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.38592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.4934387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1481018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6640319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.79571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.43035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7876892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.1751403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.7857971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.64984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4745788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.06884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7170715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8838195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8084411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9469909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7915344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7962341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7065734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0950927734375
############ Running episode number: 442  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8970031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.83441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.88629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.31329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4192199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.17236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3280944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4560241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2776794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.64691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.63848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8312072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.93267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.86895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8838195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2745666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.3448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8701477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7413635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.08624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.08544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.0662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.5166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6626281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.6166687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8432922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9166564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2043151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9067687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1940612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.59796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6459045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1363220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2372741699219
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 1 20.0 1226.10492247 (15.950694610794756, 10)
loss 360.19305419921875
Current State,action,reward,Response time,Next State:  (10, 15.950694610794756) 3 19.0 1223.7505224 (15.828704162850809, 11)
loss 359.411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.06634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.2320251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.24530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.80322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2768249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.02899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.1577453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.69287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1612243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8803405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.27923583984375
############ Running episode number: 443  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0225524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.42108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.31439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7984924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.99407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4909973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.82891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.11614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1319274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0499572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.45257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7673034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.07086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.35546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.00067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7207336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1158142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.57940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.7711486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.69451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2831726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8512878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4783630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.78277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.40118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.25311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.32623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9698791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1288146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0938415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.70684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1482238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.78118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7742004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4231872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.53765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7763366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.88446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8180847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0108947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2446594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4143981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2694396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.5046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1625061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.30340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0780944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.14862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.6247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.77801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5931701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3777770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2848205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.83453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5622863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.21820068359375
############ Running episode number: 444  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.87945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9839782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9505310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1054992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8620300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.83868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0965270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.80029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2512512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.23260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0381774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2640075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.6976013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.75201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.15802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2270812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0691833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.93743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7898864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4856872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9592590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7331848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.36968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.12493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.25701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.97216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1809387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.16314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.82330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.16497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3606262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.33551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.85479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.92877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3905944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.92718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8146057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.63494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.18975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0859680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7467346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.42840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7303771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.8856506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.38641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.39398193359375
############ Running episode number: 445  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0337219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1362609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.26605224609375
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 0 21.0 967.160346038 (11.25610796929319, 9)
loss 358.9235534667969
Current State,action,reward,Response time,Next State:  (9, 11.25610796929319) 3 20.0 1012.3192433 (11.027107764209074, 10)
loss 359.892822265625
Current State,action,reward,Response time,Next State:  (10, 11.027107764209074) 3 19.0 962.583328739 (10.995673623987257, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7617492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6859436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.86328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9571838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.48211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.7556457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.24053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.63360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.12054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.99114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3356018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.27288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7936096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2243957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.01129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6258850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.19384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4858703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8470764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0367126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.3204650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0055847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9639587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.72808837890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 4 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.90606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2626647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8020935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6974792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.74365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.81365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.52978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.66461181640625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 358.94891357421875
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 359.4774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.7283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2705993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1872863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.75213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.51226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2089538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9190368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.7145080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9984436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.48602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.5309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4254455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.9988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2071838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6266174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1779479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.8002014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.14398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2871398925781
############ Running episode number: 446  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.7322082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.61077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2285461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.93511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2671813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.66241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2008361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.3077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.5740661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.88958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2870788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.0824890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6903991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.5784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8908386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8613586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.3231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.17926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.03656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1079406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1905822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7342224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.34100341796875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 4 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1925964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.1891174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8362121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0407409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.78839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6235656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.26385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7814636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0672302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.85528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.96551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.3687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3582458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6862487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2138366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.83013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.43988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.77081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.7394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.75927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.57720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1371765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3110656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.23333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4698181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1026916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.4812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9768981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3162536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7242126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7126159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3213806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4212341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6159973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6625671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.5334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6859436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.98516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9153747558594
############ Running episode number: 447  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1711120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8540954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1002502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3767395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1743469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.36590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0656433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2331848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9204406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.1773376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.92852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.18035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.6691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.15997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8724365234375
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 359.1899108886719
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 359.7889709472656
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 358.8609313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.95574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6618957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1423645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8648986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2614440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.17388916015625
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 1 20.0 931.154858096 (10.624473674922116, 10)
loss 359.5181579589844
Current State,action,reward,Response time,Next State:  (10, 10.624473674922116) 3 19.0 941.225969064 (10.771376986314287, 11)
loss 359.27252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3164367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.25653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.32098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.2484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9466857910156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 4 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6689147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.6097106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.3357849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.75897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.63787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.79876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.36199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.39862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.58477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5337219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.8652648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7286071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1590270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.71368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.86016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.63714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.7408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.27362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.0283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2344055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7962951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.6613464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8290100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8005065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7348327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.24810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.92828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5434875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.6238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2044982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8893127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.89093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8905944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.49639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8612365722656
############ Running episode number: 448  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3926086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.19561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7358093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1809997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.58544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1319274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2375793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0241394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0939025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6968688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.5045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.2747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.46087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.39373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7912902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.1385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1838684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.90423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.06585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.3915710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.76544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7369079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1883850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0957336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8203430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8329772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0269470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.1142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.62738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3658752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2787780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.81719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1308898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1725769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.15679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.45611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.26983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0512390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6856384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7629089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5888366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.23736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8072814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.28436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0559387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 361.1547546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1519470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2036437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.21124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4349670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.0047302246094
############ Running episode number: 449  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.36444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7147521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2161560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1277770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.45361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0516052246094
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 0 21.0 949.137050055 (10.931193889570471, 9)
loss 359.154052734375
Current State,action,reward,Response time,Next State:  (9, 10.931193889570471) 3 20.0 995.311594677 (10.816918347608043, 10)
loss 358.8274230957031
Current State,action,reward,Response time,Next State:  (10, 10.816918347608043) 3 19.0 951.434021987 (10.819208572963639, 11)
loss 360.2601623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.30157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3565979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.58660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.5667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1462097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2191467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.58245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3122863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5444641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 361.4587097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.0413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.2928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.67877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.41912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.6398010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8222351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.3033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.31121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2365417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3111267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3258972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.6260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.01739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1603088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3627624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.19476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8597717285156
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 0 21.0 1089.37925646 (14.283719188889453, 9)
loss 360.0791320800781
Current State,action,reward,Response time,Next State:  (9, 14.283719188889453) 3 20.0 1170.79974938 (14.677479537099185, 10)
loss 359.6998291015625
Current State,action,reward,Response time,Next State:  (10, 14.677479537099185) 3 19.0 1156.21398489 (15.353965082180355, 11)
loss 360.0718078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.05841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8086853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7404479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.72344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.29327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.39788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.70367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.11773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.42333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9217834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.58837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.71087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.1336975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7738342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7594909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.3983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0722961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.62384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.6352844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.3341979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.24237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1247253417969
############ Running episode number: 450  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.32611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1169128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7140808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.76275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.2639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.20501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1089172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.30865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.84576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8225402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7492980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.2247009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.54974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.29840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.2474670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.99237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1889953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3569641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1445617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6206970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0947570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 361.3045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 361.529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.2048645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2033996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.6874084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.87451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1700134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.0204162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.86767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.3890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3735046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.08392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.06787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1510314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.78936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.5747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1933898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8761291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.68280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6880187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.76849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.69757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9329528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.73370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.64031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.6408386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.33770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.08203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1145935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8955383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5287780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 361.5926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.68829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.43994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.5625305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.5355529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0472106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.1080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.0202331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7655944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.09808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2826843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.7485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.75994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7918395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2929382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.84417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.4345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7502746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.01995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.02569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.1647644042969
############ Running episode number: 451  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.64801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6640319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 361.1220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2933044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0636291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.1151428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2980651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2290344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6197814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1091003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.75537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9602355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.15185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3171691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.8951110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.13885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.01776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.06768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.75628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.20489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8416442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1640319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6253967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.3613586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.5530700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2978210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7677917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7599792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2390441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.41497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3152770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.6597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.24017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.1024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.3044128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.26934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7809753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7834167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.2236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.9885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.37176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7778625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9580993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.69293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.38775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.71124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6643371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.27996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.35333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.64044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7217712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.00518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.65521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2342834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.27679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1199645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.9005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 361.461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5972595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1746520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.87640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.4075622558594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 4 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.75543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.06219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01568603515625
############ Running episode number: 452  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.39398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.34759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7581481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.1291198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.0993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6604919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.66607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.35162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.98883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.03076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.1872253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.63818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9971618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.30078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8312072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.04412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.70166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.8003845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2802429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6028747558594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.32513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.4388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0458679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2807312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.78729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7414245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0286560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.56475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9900817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 361.1510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.24432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.53656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0082702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1118469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.9161682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.74676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3252868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0397033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.73553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.30804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.4742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.34014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9911804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.16455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.5113220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6051330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.5247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.6767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.35992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.00689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.0268249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.1935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8836364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.64947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 361.7405700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6274719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1465148925781
############ Running episode number: 453  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.28314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.26861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.9834289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0877990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.1461486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8062438964844
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 1 20.0 939.812260006 (10.768325938188134, 10)
loss 359.2374267578125
Current State,action,reward,Response time,Next State:  (10, 10.768325938188134) 3 19.0 948.856481751 (10.772009508959538, 11)
loss 359.3968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 361.4151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.1745300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2647399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8186950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.72833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5892639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.16619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.28924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1475524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.86761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.27490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1631774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.11004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.5309143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.81854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1423645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.09161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6522521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.38134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0255432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8362731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.25274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 361.4893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.78363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5888366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.28759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.26361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.14111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.63824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8688049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3737487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7397766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.8373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1445007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.21136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2861022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0287780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 361.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2200012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.42633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.40032958984375
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 358.71826171875
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 360.7078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.8622741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.27001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.89971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.02630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7204895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9729309082031
############ Running episode number: 454  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8141784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8171691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9900817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.42523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.5644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.27215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8429870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.35076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.66754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.19696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6556701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0492248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.06134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.5788269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2845764160156
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 2 19.0 925.789969445 (10.489125480251131, 11)
loss 359.35455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.35162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.61090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.57171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.95751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9690856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6609191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.17510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.6741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.35467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.33843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.95220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.2344055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2503967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.59759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.61065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 361.1577453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.31610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3906555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2513122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6506042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3574523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.27264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.33349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.72314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7151794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1705627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.08245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.00213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.12353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3153381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 361.20733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2713317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1955871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.1466979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.17437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.16400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.22393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.2882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.31744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.00262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.30859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6839294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.4007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.81915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8708801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3996887207031
############ Running episode number: 455  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.5765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.7652282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.5696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8637390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.5884704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.84539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.72015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.08636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6871032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.19757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.54046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0729675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7553405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1023254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.97979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3253479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7841491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7672424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5563049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.69818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6062316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9580993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6979064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4751892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7682189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.6251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.58013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.73004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.48553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.7465515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.78973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.8362731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.12860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.63970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2828674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7534484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7526550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.34320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3567199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1618347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.0243225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8302917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.13006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.11712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5893859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0796813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 361.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.07421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2113342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.44342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.4346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.30340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.0711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1056213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.105224609375
############ Running episode number: 456  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.25634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7311706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2339782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.27301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.47039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1315002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.91937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0707092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3436584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6855163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.64459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8954162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7245178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.83013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.0204772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.37957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2945251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.42547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1479797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3076477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.4727478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.6107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7551574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.05535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8086242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.33984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5251159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.1336975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0585021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3200378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.4553527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.31390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8376159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.6502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5229187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.34039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.99420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.68072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.77294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8091735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.80340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.98077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7710876464844
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 358.96282958984375
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 358.8182067871094
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 359.2096252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7690124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.65399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.17950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1268005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9301452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.49444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6822204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.0350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9418640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.12493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.97161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1269836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.73846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.60107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.6394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0610046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.74609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.1292724609375
############ Running episode number: 457  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.71875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.13763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 361.0030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.10247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0879211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.21221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8505554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.08642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.71368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.05731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.47784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.63189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.27056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6017761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.64154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.31976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4748229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.94708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7055358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.16827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3293151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.20648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8274230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.28692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.90924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7810363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.10211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.59521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.63470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 361.3417053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.28070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.77484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8512878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.32354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.75177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.5119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0730285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8380432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2685241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.1319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1736755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.32452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.56219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2577819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.58709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.76800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5634460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.2886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.28955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.70489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9874572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.27593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.98712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.76104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.143310546875
############ Running episode number: 458  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2960510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7662658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.17529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.95574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1114196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.03826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.5464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.94207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1297302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.5775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.81768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.57684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.0976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.75115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.75238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.79559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.7290344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1536560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3273620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.30462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.76409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.4001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.77899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.19012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.75396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.00274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2043762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.86700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.8100280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3968811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.6781311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.77001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.1033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8084411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8970642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.29193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.82208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.15216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6773986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3204650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.30804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0906066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4181823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.07659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.96588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.22320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7180480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6216125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.25714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6458435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9953918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.3338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8858947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6596984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.9532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7753601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.1986999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.9031677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.95343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.19793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3509521484375
############ Running episode number: 459  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.74151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6050720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0568542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0532531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.8676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.87994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1587829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2698059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.81903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3196105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7230529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0862731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6205749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.2132873535156
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 2 19.0 914.247384359 (10.305649118067803, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1932067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.83624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.4064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0795593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.75115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.37615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2047424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.33477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9220275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.3053894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5779724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8744201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0800476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.53179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8177795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6446228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2784729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6264953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4410705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7947692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7101745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.30621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.12109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.9280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.8025817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0702209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9060974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7411193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.29156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6667785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3114318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.9500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8158874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.79632568359375
############ Running episode number: 460  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.47125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.88128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.32794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3542785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.65106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.13543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3935241699219
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 2 19.0 937.12351295 (10.772009508959538, 11)
loss 359.06201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0352478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.6778869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6876525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.24359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9293518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.25726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8559265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2724914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.16217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1545715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2715148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.29144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8533630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7525329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0089416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3484802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3817443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7710876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9349060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3658752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6649475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.84283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.54656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.78662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.72601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8269958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0740051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2993469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2240905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8145446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9034118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8171081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.10662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.88616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1159973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.39959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3069152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8733215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.08966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.51580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.34539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.84295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0825500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.88775634765625
############ Running episode number: 461  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.76934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0854187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.3268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.92181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.87335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.85052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7574157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.81036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.83331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0891418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7783508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0736999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.86505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4361877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7919616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.6966857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1634216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6013488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7984924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.32208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2216491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.04400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1201477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0815734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1837463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.73626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.8947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.20819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1165466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7577209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8031311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8774108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.5676574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.08782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6565246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9648742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.33319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.31292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.77813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7682800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6858825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7138366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.42401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9804382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0377502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1799621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8293762207031
############ Running episode number: 462  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1105041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.08868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9309997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1946716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0097351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.79534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.81243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9187316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6404113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.44830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7953796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8369445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1261901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7376403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.64056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2775573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0382995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3088073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.92462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1128845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6725158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.53558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.79119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2421569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.86175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.52142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.89422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.76239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.70623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0360412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.60003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.86279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1680603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9394836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.61090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.9241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8613586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.84808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.81207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8115539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7904357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.45074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0466613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8133850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7801818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9396667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0397033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9220886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2096252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.69879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.67279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6335144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.69940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.25836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8179626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.22442626953125
############ Running episode number: 463  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.83636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9671936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.14776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9629821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9201965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2931213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.81640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8683166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9789733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.4935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.33746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1153869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.14501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1589660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.24127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9020690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.27685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.82672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.17889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8201599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2369079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1816711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7536315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0962219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.2442321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.7488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9281921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.39727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.80206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6991882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.95465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7478332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0036315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.13275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.73846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1291809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1997375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.63720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.84619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9522399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8108825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3094787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.09747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1463317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2627258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8181457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7125549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.06414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.97650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1507873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0432434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.21630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.08636474609375
############ Running episode number: 464  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.18011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.89556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.10577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9834899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.96405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9894104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.35552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0058898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.21502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8691711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7884216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.29071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.77001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.74017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2425231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.82781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7963562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0069885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0388488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.77227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7768249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.04840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9269104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1094665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.6777648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2630920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0428161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.22698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.69091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6744079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.83441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.92498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0324401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2756652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.08856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8954772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.8670959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8467102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0129089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2093505859375
############ Running episode number: 465  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0406188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7756042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.5924377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.92529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8256530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8018493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.14044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.17852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8659973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.16705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7510070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.81341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0710754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6377258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0264587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.88140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9233703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.85589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.72454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7959289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.97113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.01898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6870422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.13641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.13714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.08575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8137512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.96099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.82861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07073974609375
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 1 20.0 1189.84140354 (15.446694946204717, 10)
loss 358.9473876953125
Current State,action,reward,Response time,Next State:  (10, 15.446694946204717) 3 19.0 1197.01631782 (15.750501603468638, 11)
loss 359.03497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.72186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.30999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.70599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1591491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.84320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.20648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.70257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0129699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.08746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.85064697265625
############ Running episode number: 466  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.79638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.60504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9747619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.6962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.04534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0271301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1846008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9354553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.97052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.06048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.66021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.28125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.65948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1314392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.94354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1549377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8400573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7581481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.67279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8168640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.71539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.07421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1330871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3120422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.74560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0606384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2132873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.10107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.98651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2304992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.17779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.44097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7430114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0790710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2042541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0563049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.4416809082031
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 1 20.0 1089.37925646 (14.283719188889453, 10)
loss 359.5076904296875
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.05828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0827331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.09405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.18902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0589294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.72216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1097106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3943176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9058532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1704406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2006530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9452209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9514465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.08709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1813659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0787658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.38079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7784118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.5588073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0101623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9139709472656
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 1 20.0 1229.17115431 (16.667936385136993, 10)
loss 359.45294189453125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 358.94317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.29864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8658142089844
############ Running episode number: 467  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.30218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0823059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9254455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.20111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2759094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.20208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7498474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6714782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3015441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.30206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.35028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2565612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.46820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6847839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3640441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2842712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0780944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.27447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1709289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1942443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.26336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.16204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3063049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.09466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1705627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.11126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8288879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1319274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1321716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.14337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9427795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9593200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7408142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1195373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9222717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.18109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.52838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0164489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.73321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9424133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7994689941406
############ Running episode number: 468  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3017883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.91912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0122375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7167663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.19317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.82568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5373229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.26312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.40447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1058654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7250671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2641906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.97650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.66571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.87994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.19775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.77105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.84381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9028015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9450378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9834899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7237854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.40228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8091735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.6100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8205261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.7533264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0257873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2008972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.23663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.16204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.79327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.70184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4283752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.36822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.82452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.8351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.5143127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9747009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3530578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0204772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.77716064453125
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 0 21.0 1184.33851965 (15.750501603468638, 9)
loss 358.9432373046875
Current State,action,reward,Response time,Next State:  (9, 15.750501603468638) 3 20.0 1247.57857022 (15.817158911312735, 10)
loss 359.1095886230469
Current State,action,reward,Response time,Next State:  (10, 15.817158911312735) 3 19.0 1216.66724247 (15.829956988360925, 11)
loss 359.1221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8758850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.97900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.82464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8742980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7434387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.35003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7989501953125
############ Running episode number: 469  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.49017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.5711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1855163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.75592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.66986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0329284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.73602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.07220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8677062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.26251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.12109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6814270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.75994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.24676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6781311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4123840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.78045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.4186706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2439270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0862731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.80377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8681335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.5474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.98101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.20068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6670837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.1684875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9309997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1307067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2462463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0808410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.82232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.0538024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9512023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.73529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.09649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.90673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.77923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.95599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.10504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.13671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8992004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1307067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9804992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1475524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9416809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0211486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.03692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.20587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1744079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2096252441406
############ Running episode number: 470  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7902526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0278015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8111267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.87823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0411376953125
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 2 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.66424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6459655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.85150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.5057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4648742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4723205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2367248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.84393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.06524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2999572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.29425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3585510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0292053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.33197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9313049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6316223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.38934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5355529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9568176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5963439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8016052246094
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 0 21.0 1238.24711194 (16.871606159345866, 9)
loss 359.0456848144531
Current State,action,reward,Response time,Next State:  (9, 16.871606159345866) 3 20.0 1306.26286107 (17.534967586021782, 10)
loss 359.3093566894531
Current State,action,reward,Response time,Next State:  (10, 17.534967586021782) 3 19.0 1307.78684385 (17.669285735563751, 11)
loss 359.6016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1573181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.3188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.87188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.79498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.23980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.63372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0050964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2477722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1454162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1832580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.68701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.50048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.39166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.62908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.56402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.7902526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.20806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7881164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1723327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7077331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1332092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.10284423828125
############ Running episode number: 471  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9781799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 360.2058410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.58709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0151672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.0536804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0711364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.54388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8450622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2583312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7558898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.70458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9629211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8564147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6756286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.4488830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8949279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.5335998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8229064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.46685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2323913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.5003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.03314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.30010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.05670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.96990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.78973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.81683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9305725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.79736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9382019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.5663146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.30877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0866394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.55120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7890319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.86041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.78875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.8212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.22650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2380676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1534729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2402038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1031188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.87677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3431701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.0929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1850891113281
############ Running episode number: 472  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.81951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0823059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7035217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.83953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.26080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.5819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1310119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1567687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.5696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.63189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.08447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.24609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1435241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4888610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.5957946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.94512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2475891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.74029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.26629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2395324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.40069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.92791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.11163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.81494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.80841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2242126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8518371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9953918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.5055847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.54290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3317565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7876281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.1223449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.80810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7946472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7311706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.70709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8428649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2798767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.0841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.43927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7510070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.11578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.69964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2049865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.2032165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8013610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7771911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.79541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5888366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.29510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.68994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.4087829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8435974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.28460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2313537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.91143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.75909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.92523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3153381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.64825439453125
############ Running episode number: 473  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8210144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3142395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0773010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.5839538574219
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 0 21.0 950.798097136 (10.995673623987257, 9)
loss 359.1490783691406
Current State,action,reward,Response time,Next State:  (9, 10.995673623987257) 3 20.0 998.686790566 (10.931193889570471, 10)
loss 359.30731201171875
Current State,action,reward,Response time,Next State:  (10, 10.931193889570471) 3 19.0 957.495664348 (10.816918347608043, 11)
loss 359.4666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.73834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9859924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0402526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.21746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 361.2906799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.43634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.83709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9447937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0117492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1895446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1074523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0847473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.6875915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.1195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2554016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7180480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1835632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.7450256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1836242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.1131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1739196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.51873779296875
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 2 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.88140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.92095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3083801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1714172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.48193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.91412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 361.3996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.3122863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1574401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.87310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.15325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.02459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1162414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.12274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2402038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1206359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.99981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.6695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.76275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.02581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.99334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.74920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8428039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.32769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.77618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.69757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8535461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.03875732421875
############ Running episode number: 474  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.34185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1799621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.25811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.5518493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7363586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1305236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.52276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1108093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.68353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.2044372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7354431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.5097961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4602966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.74273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6418762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0984191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.96807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2761535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.7242736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7925109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1070861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.33087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.3497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3409729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.73406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.21356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7835388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3936462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.5565490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6833190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2027282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7552795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.4332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4066467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0254821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1352844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2220153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8525695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4363708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7390441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9442443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7466735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.5541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2525939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.1249084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2591857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.95306396484375
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 2 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8335266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.02581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.59710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 361.0966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.24493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0991516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1676330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0805969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.7518615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.85333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2614440917969
############ Running episode number: 475  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2099304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9872741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0257873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.5928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7704162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7733459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3860778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.35650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.5708312988281
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 1 20.0 922.369964659 (10.448897752470936, 10)
loss 359.66375732421875
Current State,action,reward,Response time,Next State:  (10, 10.448897752470936) 3 19.0 931.912703681 (10.433149880183072, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7955017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.37432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.66485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.08056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0753479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9267272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2840881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.9248352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.50689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6037902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9804992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.88043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.6858825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1783142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.85369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.07763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.4975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.5507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.7646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2074890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.8551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.91217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9220275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.42803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7654724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.94366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1435852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.4866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.11138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 361.23931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.04931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0970153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.1313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.04754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2804260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.77642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.12469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2074890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0444030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.6220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.3794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.84912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.9593811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9585266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.02587890625
############ Running episode number: 476  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.6905822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0592956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.45233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.51458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9557800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.83538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.38763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9100036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0353698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6091613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 360.3564758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.8141784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.9858703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.5621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 361.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.7980651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7684631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.34912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.9444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8578186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.97784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8307189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.6264953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0378723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1137390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.59088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.56475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1162414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.5049133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.0533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.38580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0809631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9900817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.80853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6582336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0581359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2630920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.4280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7860412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1180725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.19195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3030090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0779113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.09490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2611389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.7231750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.33837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6579284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.02374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.3283996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.25604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.70184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3137512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.3222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7954406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8341369628906
############ Running episode number: 477  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6529235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.87835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7944641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 360.1636657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2294006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.36871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.43145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2014465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.7115173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0962829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.6372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8747253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7180480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.09307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.93798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.03570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8469543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1003723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7394714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8970642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2437438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0477600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.98681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.20294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6895446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9598083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.2381896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 360.0476989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7859191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0041809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.5733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7600402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7426452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8626403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8005065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.81427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.7386779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.9560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8225402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.21270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.15338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8093566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8680725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.22772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.18890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.13818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.33856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.39703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1911315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7841491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4329528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.22625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.63385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.5930480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.93353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8411560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4620056152344
############ Running episode number: 478  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.89166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8578186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 360.1987609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9184875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.45074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1003112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7988586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.51806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.20428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.69781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3439025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7254333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 361.06707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.1962585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.97955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.32452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8337707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.8118591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1139221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6669006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.0013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8601379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.08526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8275451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9555969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.5415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.98028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0830383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1399230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2381286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 360.22503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4217834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.48760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1023254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.19195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8840026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.08642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3368835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9133605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.31976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.34674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2187194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.25103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.3793640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6938171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.8046569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.85467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.6586608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8211364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8996887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.03460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.87310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0096130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.80853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.00262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6540832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8827819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9226379394531
############ Running episode number: 479  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1806945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.06463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8434143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1088562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.75372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.1130065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8685607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8985290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.71234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9877624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.44171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0938415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.75634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.74591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8407897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0359191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8806457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8373718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7640075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.76104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.94775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.25787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.58905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9934997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9834289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9324645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.8681335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8110656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.84259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7135314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.84625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0952453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1736755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.15887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.96539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.42449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7292175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2124328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7422790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.4842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7235412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8021545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.73211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.4805603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.8441467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.70880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3321228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2413024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.28851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9715881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9420166015625
############ Running episode number: 480  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7761535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.00128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1673889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.26300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8587951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.30743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.83575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.62701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.96514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8497009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.33709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.40966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.12274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.92779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.5328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.88177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6289978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2970275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3133850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8587646484375
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 358.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.2486877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7712707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9237976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8022155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8212585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.26214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.5940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.24359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.70458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9797058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.71380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.87481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3490905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9057312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9537658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3167419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.83453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6914978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.84002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5248718261719
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 358.8596496582031
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 358.92535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1022033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.89434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8529968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2623596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7304992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.05633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.6253356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.04052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2019958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.74359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.27496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0191345214844
############ Running episode number: 481  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2181091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.78118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.27825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2028503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.67218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8893737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0229797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.60693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.85504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.43499755859375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 359.11767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.32501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1200866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2317199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.95599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1246032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.99383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8833312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.18682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9519348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9128112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0085754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2795715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.00927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7915954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7439880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.1911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2039489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7134094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.11700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.87060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.92779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.81695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.908447265625
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 359.5594787597656
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 358.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.0050964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8970031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.99310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.44866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.62890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1474304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9137878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.93389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7667541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.40045166015625
############ Running episode number: 482  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2263488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.87249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6631164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.34698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.79437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.62689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.1961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.83319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2217712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3218688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.71563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.4635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.50531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1088562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4550476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.33447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.31402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2257385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2389831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9701843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1488342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.48211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.61761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0884094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0303039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.1378479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.5224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1583557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.63726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.35205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.27777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1701965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.80743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9364318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8283996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4287414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.8197937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1394958496094
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 2 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.88250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3612365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2070617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8625793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8981628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0353088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8219909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.14813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.37176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.6612243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2249450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.21820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4457702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2095642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2041320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.28704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.2523498535156
############ Running episode number: 483  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3602600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3307189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.88690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6727600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7098083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3697814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.41241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.83380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.23773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.90606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3404541015625
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 359.3974304199219
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 359.3222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9371643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3341979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2300720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.96429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.18267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7248840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1141662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3379821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4665222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9858703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3800354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7009582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1982727050781
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0607604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0040588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.15618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0074157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9442443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.35833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.20953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7716369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7372131347656
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 1 20.0 1385.62570908 (19.08360399753829, 10)
loss 359.1627502441406
Current State,action,reward,Response time,Next State:  (10, 19.08360399753829) 3 19.0 1389.93285614 (18.668181536495972, 11)
loss 359.0074157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9833068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7663269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2030944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.84942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.22808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.96466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8503112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.11407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3670959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8954162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.7685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.21075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.19390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.88055419921875
############ Running episode number: 484  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4021301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3174743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.87542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.41162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.5667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.80126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8545227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1955871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8664245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2248229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3285827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.25738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2523498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.80633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3462829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2328796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.22265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6203308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.64971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0022888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.6724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.31903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.07037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1913757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1966247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0592956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6482849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2102355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9104919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.84857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.36151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7461853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8945617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.99468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2789001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1633605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.6796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.0409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9294128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8078918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8658142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.69427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8296203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0745544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.24261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.22601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.61712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3753356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7604675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.74041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3764953613281
############ Running episode number: 485  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3618469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.80157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.22320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.23333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3731384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8428039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2558288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.68023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.26611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.41815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0833435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7871398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9884948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2556457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.41436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7834167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3697204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2934875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6905822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8140563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.10626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0301208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7962341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9297180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.37451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.77569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9998474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.24835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7531433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1031799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.34332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.35107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4246520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1752014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3629455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.93450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2275695800781
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 2 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8514099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.09564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.8304138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.48321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.13336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.45526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0284729003906
############ Running episode number: 486  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.17303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1188659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2455139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8487243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.38555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7070617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.99005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1948547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9815368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.40631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7605895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.04388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0873718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8575134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.81463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7030944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.42181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7808532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.89404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.41619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8580627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.36163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2886047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7351989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.39404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.28985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.34527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0893859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.07904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6979064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.85870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.67205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7429504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2959899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8053894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.74237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.83392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.6189880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9502868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.39044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2291564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.70904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2825622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1276550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.83447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.10186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2428894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.22772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0373229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.6523742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.02691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1800842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7779235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.34173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4250183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7738342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0408020019531
############ Running episode number: 487  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1255798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.48248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.83349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.10406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6365051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9538879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0884704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9909362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.84185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.19744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.45587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.73907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2288513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8608703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9308166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6802673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1258850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1553649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9099426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.86627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6213684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9595642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.38714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2496032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.00439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.5760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8204650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.87396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.86578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0522766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9505310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2789611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7057800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.74774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1332092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.5093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8731384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8376159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.36920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.81756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.96533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.5730285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.73223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9111633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.35784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8574523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9470520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.46771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6665954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8286437988281
############ Running episode number: 488  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3349914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8289489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.22125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.90618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.71649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7390441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2196350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.39068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.84234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9313659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.2203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9551086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7610778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.16815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.96221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2111511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9065246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.12298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.30328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.00201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3392028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9826965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2319641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.79473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7984924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.77764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3172912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.6982116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.75775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1879577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3633117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.15875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.33331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.23577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.81036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.90216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.86456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.6380310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5085754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.23681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.81646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9540710449219
############ Running episode number: 489  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.93157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8082580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.69708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.04071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.29119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8740539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2954406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4255065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8800354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.86541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.21636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.50592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0787658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.09564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9228210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.76568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3239440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.0191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8894958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.94580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1954650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.6791687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2299499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.57916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.10260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8252258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8360290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.46136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7201232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.32843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8122253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7139587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.96612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.36383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.39697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.23828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7363586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.98345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.90423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8547668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.53314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4076843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.72821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9084167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1963195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3171081542969
############ Running episode number: 490  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.26361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2624816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9723815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0610046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.14068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.48309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0959777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.7430725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3243103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8871765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2000427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.68499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1819763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6218566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.18963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.76885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9952697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.00323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.99420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4364318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0544128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0393371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9322204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3794860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.4212951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.91046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8238830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7765197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7873840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.57122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8518371582031
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 1 20.0 1376.52055872 (18.668181536495972, 10)
loss 359.88909912109375
Current State,action,reward,Response time,Next State:  (10, 18.668181536495972) 3 19.0 1367.89714889 (18.375894992990247, 11)
loss 358.86328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.73052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.03326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.11676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.69342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2603454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2914123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8335266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.63739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.53802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8606262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.32781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.5066223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.75518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5060729980469
############ Running episode number: 491  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.23724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.5538635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3170471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.71649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9045715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7848205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.01947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.5226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7812194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.02923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.67364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8239440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7978820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1186828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.96588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9462585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3517761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.45880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0190124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1125793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6033020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7417297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.08917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.42486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.05084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0134582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2192077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.78326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.08001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.4649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2124938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7972717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7981872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0678405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.97235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.32025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.15533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0498352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.19500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.77996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0807189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8013610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.23321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.5835876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.10992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8060607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.41265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.05621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.79437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.28912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.79241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.31109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.87176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3776550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8229675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8049621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.89013671875
############ Running episode number: 492  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8879089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.18609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.24420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.88482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0824279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0419616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.6805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0698547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.43292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.33660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.82159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.03753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.03192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.46014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.15643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4460144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9367370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.98431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9763488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8529357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.16741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.93695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.21356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9356994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.94970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0077209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.21697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.80029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2397155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.71826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7940368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0522155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.47528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1982116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.88427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7917175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9417724609375
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 2 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.34674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2684631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.12213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9913635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1045227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.83575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.48956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.59246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.30914306640625
############ Running episode number: 493  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9592590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7346496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7312316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.27490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0874938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.19622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.47735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.89739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2813415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9120788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2347106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5304260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6861267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8464660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7389831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.13812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2887268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.24853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7511291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1865539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.80267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.4226379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.74627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9985656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.55438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8140563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.76861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8623352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.68145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6557922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.11224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7960510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2183532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.09521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0419616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8503112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7841491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1775207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1103820800781
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 1 20.0 1310.13203606 (17.229782241685768, 10)
loss 359.4739990234375
Current State,action,reward,Response time,Next State:  (10, 17.229782241685768) 3 19.0 1291.59856437 (16.84211602880065, 11)
loss 358.9638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.78839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7743835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.82366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7815246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7704162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.84820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0735778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8188781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8493347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8135986328125
############ Running episode number: 494  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.39404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.01025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.92828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2618713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2446594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.55718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.06488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.29949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9047546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1059265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8011169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6952209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.04351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.86663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.81695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2882385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2909240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.86590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.57177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.09326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0616149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7598571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.18731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0605773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9747619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7764587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8175964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7538757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.42987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1244201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.47412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.87591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8501892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7278137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9447326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.29864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.80621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.44244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9281921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.89569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8144226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1894226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.55474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.86767578125
############ Running episode number: 495  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.26898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.01898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.86920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2094421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2776184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.7086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2247009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.02362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.35980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7102355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2967834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1282043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.52667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8711242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.27886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.82293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3394470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2646789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.20635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0801086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8616638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2978210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6802673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7947692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.93060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.22821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8847351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0125427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0287170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9394836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1056823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0129699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0621032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7710266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9934997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9915466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.5911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.99847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7927551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.38812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0108947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2478942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8429260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1559143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.71173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.19732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.40460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8875427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.7062683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0223693847656
############ Running episode number: 496  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.08392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1584167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8744201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7799377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1277770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8017883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7577819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1557922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.99761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1720886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.89892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.07757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.76806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.09857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.63427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.81298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.02435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.66827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.28558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.80535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1047668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0347595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8653869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7287292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1783752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.24835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.96844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1227111816406
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 2 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.4854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7674255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0446472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.42279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8845520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1252746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.52484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.04132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7231140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.01141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.05242919921875
############ Running episode number: 497  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.96051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.23614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7812194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0547790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9324645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2654113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.45562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.6212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8023986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.80194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.12628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.68670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.84295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.08807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8701477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1483459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1451721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8547058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3378601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0268249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.68865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7863464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8588562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.99554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9757995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7334899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8200378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8237609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1338195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.14031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7344665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1601257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.18804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2092590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1355895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.00750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1528015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0407409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.97662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6243591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.07061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0041809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2663879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0612487792969
############ Running episode number: 498  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2382507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.13787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0304870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.14288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9978942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8015441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.03997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7360534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1257629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.66839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7775573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0957336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.36126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.83343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0100402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.86376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7949523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8307189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.37554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.12652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.89532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.85772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7785949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.48199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.05938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.03753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9385070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5105895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9739074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7714538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.730224609375
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 1 20.0 1204.9600972 (16.466876895473597, 10)
loss 359.0456848144531
Current State,action,reward,Response time,Next State:  (10, 16.466876895473597) 3 19.0 1251.130943 (16.871606159345866, 11)
loss 359.1826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.83465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2886047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.21649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9602355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.34783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.20672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1555480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.50860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.10247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8418273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7773742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7736511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.26824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7602233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8058776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8074645996094
############ Running episode number: 499  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.90289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8279724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0606994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.13861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.79327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.83453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.77374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.94635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.31878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.19854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0668029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.94720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8534851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.24432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3061218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7603759765625
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 1 20.0 917.140709305 (10.425974763084863, 10)
loss 358.64031982421875
Current State,action,reward,Response time,Next State:  (10, 10.425974763084863) 3 19.0 930.696774523 (10.546025383098053, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.67828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6305236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.38958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3227233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8606872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.14886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2074890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7291564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9149475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.16937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0729064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6784973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.28521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.42071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.93402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2781066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6888122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.76251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.18145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1711730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0715637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.27081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.33917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.43963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.89422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3721618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.89703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0596618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.85797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.72894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0252380371094
############ Running episode number: 500  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.05157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5923156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1662902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.4374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.83868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.6953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9593811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7828674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8836364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.42327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.27117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0757751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.79290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9641418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2207336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8409729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7874450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8350524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7543640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.71405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6913757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.4190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.92376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2118835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1622009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2857971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.97589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2210998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7475280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.80718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0475769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2851867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8652648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.20697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0276794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.8943176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.32232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.12664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.42987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0204772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3724060058594
############ Running episode number: 501  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.10211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3657531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2641296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9042053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.83856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0817565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.6329040527344
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 1 20.0 927.560809977 (10.552868829802469, 10)
loss 358.7593078613281
Current State,action,reward,Response time,Next State:  (10, 10.552868829802469) 3 19.0 937.427755072 (10.553846649940214, 11)
loss 359.1014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7856750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3930358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.76324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0536804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.89361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0382385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4112243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.74212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8002014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1319274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3582458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0241394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8110656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3544006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.05242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.78375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.16802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.86962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.20892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.26654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.98583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3064270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.80645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4604187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.75146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.08734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2554016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8321228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8150329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8363342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.69451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0968322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0779113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.08251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0492248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8018493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4233703613281
############ Running episode number: 502  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3393859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.95440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1286926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.44976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.77093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1024475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7763366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9002990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.89727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2691955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.67864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8809509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1497497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.8976135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.30657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2082824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.15179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8218688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.77545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3409118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.66552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2386169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8675231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.42730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.43988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.36822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.32916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.83367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.77056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9777526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.03582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2379455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1415100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.80810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8938293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9375305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3400573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4234313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.78045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.19683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.10113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.99298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.19647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9388732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.48333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9660949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.62017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3183288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.36676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4599914550781
############ Running episode number: 503  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5325012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.04791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.22393798828125
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 2 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7025451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.78515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.7242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.99078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.75103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.65240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9331359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0809020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.78411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.99713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.4067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.37506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8569030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0332336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.23162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.31475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7100524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6914367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.15582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1438293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8154602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8832702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8413391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.39752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1306457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.05426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3053894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.83026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8526916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3034973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5663757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8005065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.42425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.27630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1923522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.61151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0097351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2336120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.82025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2638244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2139587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.74017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0154724121094
############ Running episode number: 504  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.19879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.99139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8982238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0602111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0218200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2623596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.84619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0702209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0217590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.83599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.95611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.7660827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.4266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.18341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.79290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9967956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.84283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0370788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1654968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9220886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.7492980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.40667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9454650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1803894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.5239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.35430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1046447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9725646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9718322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2212829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7834167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0521545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1562805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6421813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8497619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2972717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0185852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.10052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8659362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.6961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6442565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5801696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1022033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3148498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.92730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0734558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9635925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.28204345703125
############ Running episode number: 505  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2776184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6088562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.51171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.5906066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.05731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.95977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.99847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2150573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1928405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3659362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.33837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7886657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.25006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.52996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2567443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.48016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8585510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4355163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0215148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.03729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2129211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.00347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6164245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2712707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7149963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.14794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.92523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1202697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.53009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4064025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0246887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7835388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1063537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.30010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8665466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.63604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.49505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1910095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.74810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0586853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8421325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9878845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8899230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8905944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1098327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1557312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.86016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.85107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01873779296875
############ Running episode number: 506  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.88189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.86859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7827453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2693176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2953186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7568054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.90399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.42755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0204772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7464294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.16802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.32049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1405334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9989929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.84552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.15582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.59979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.10601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.30010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4667053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4031677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0099792480469
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 4 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7447814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.76123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7827453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8181457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2732849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.02569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2668151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3038024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9794616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.40411376953125
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 0 21.0 1387.23260634 (19.340464848017284, 9)
loss 360.3757629394531
Current State,action,reward,Response time,Next State:  (9, 19.340464848017284) 3 20.0 1435.4954296 (19.213467265587269, 10)
loss 358.9275207519531
Current State,action,reward,Response time,Next State:  (10, 19.213467265587269) 3 19.0 1396.82133527 (19.140765783401285, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.75
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.04461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 360.3185119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6629943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.57733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9176330566406
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 1 20.0 1213.81868812 (16.017694914042416, 10)
loss 359.2681579589844
Current State,action,reward,Response time,Next State:  (10, 16.017694914042416) 3 19.0 1227.30449265 (15.947547279389703, 11)
loss 359.34033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.5279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2663269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.67169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0372619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0032653808594
############ Running episode number: 507  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.49652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2807312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.37835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.99322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.14849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3028869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.84521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.30255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8198547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.30645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.64599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2002868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9308166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.34722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1216125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.08203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0516052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9457702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.99798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.2597961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8099670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.2158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.96533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0812683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.74627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8036193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3082580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.05902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7287292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2989807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0420227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6004943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4703674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.82733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0229187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.39739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1546936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.52923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1521911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.20098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3915710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.37298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7115173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.3012390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.98956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.0745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1045227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.41259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9052429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3487243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7878112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0420227050781
############ Running episode number: 508  ##############
Action +2 not possible so Scaled up by 1
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 4 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.43682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.82659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2210388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1562194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.14788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.69659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.02325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8079528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.06512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.29736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.45068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.02337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.55084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.76336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8938903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.75262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.08123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2290954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.24945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.41558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.56591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7372741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9522399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.43707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.03271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.94854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.82244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0754699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5745544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.16265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.23040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.9317932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9331970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5483703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.5545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.15789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8899841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.23956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.65966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.40570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6148986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8680114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.86224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.5052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0240173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.41326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6687927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.6317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9952697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.02862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.93597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.31280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.70465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8396301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7991943359375
############ Running episode number: 509  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.35382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.72833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4354553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2428894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7460021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.0850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7403259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.19256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.20294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.14678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.70513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.38250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8354187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.92120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.6678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9028015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6609191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8955383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3186340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.24945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3974914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.23748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.58746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1398010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.70367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.69879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.91668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.76739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.79449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0312805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7954406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.42041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6730651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.68255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7254943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.40789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7995300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.27911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.88763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7920837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.73431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.81781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.18463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1245422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.41046142578125
############ Running episode number: 510  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1183776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0816955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.89459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.54827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9382019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.18731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.84027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7279357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2348937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3169860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8789367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1540832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7768249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.44744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.72186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.94671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.2055969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.5682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.86480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9953918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0603332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1715393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.79559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.22259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2095031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6841735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.98553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9965515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8777770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7307434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8694763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6018981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.25439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6180725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.7481384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.6637268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3635559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.68359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.04071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2193908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.79241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8275451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8045349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1142883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8570251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.5691223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.7911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7933044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8525085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0780334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.07427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.6419982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3871154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.30572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2330627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.08587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.74127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.86285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7621154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.08636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7249755859375
############ Running episode number: 511  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.29144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3394470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.04193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.30364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.59246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3616638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0450744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8626403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.27618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8012390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7901306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6314392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6268615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.4085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.39385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9042053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.98358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3016052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.06646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9068298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.65966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8546447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3243103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3119201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2387390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7876281738281
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 2 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.06414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.24346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8324890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.7739562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.7814636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.4267883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8200378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6873474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7602233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9013977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9571838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1753234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1237487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.86334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.86041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.82415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.73980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0010681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9698181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9482116699219
############ Running episode number: 512  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8228454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.72161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8451843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0562438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.98028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0329284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7278137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.78216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.22039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.10455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.10577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.22735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0625305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6769714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0853576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.64422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0591735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7687072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.07513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0893249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.45574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7846374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.5848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.03204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.84942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.2086486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1240539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.08575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.88446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1783752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0884094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.55157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9783630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.80816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3818664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0124206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7770080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0177307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.66241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.12957763671875
############ Running episode number: 513  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.01959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.00616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8833923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9940490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.0201110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.29888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.0260314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9023742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.95721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7973327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9713439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9875793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9031066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9535217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.07708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3249206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.90606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0221252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9748229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.2745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7127990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1813049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7348327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7598571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.82623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.21502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3000793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.96356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.76031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8474426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8199768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.22637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.25360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0569152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6720886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8501281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.74951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.83270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8408508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9066467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9856872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0007019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.82696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.08984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9093933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3097839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.9121398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9826965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1889343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1890563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9837341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0795593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.029296875
############ Running episode number: 514  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2044982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8592224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2806091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7068786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.75360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.75494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2846374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.47381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.18560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8808898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.34246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0826110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8897399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1648864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.70068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0752258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9268493652344
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 2 19.0 931.154858096 (10.624473674922116, 11)
loss 358.5927429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.00665283203125
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 2 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2148132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.90240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7516174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8801574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.83050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.05572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0313415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.75677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.78570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.14495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7404479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3911437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.41802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2338562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.88470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.80279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1916809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3290100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1753845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.84271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2303161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.83642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.3600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.96405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8512268066406
############ Running episode number: 515  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0466613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.96356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.24615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.03021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9227600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0097351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9191589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.11920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6309509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0876159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6631164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1434631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.21649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.38922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7281799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.5660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1224060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7319641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0146789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9457092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.06451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.97467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9665832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.12860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.95904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8028869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9266662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.03546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.24859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.5849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8715515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8211975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.98663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.5321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0974426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9330139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1269836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.91241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.91876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1166076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.00439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2908630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.04010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.6511535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0968933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9270324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7352600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2600402832031
############ Running episode number: 516  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.13232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.11419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.82342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.83929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0495300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.23193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.37615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7306823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7958679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.25006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2356262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7969665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.19207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8368225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9576110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9234313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.7666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6526794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.13037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0534973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0121154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9580993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.79595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.37506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.74176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7721252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0901184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0433044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8159484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1733703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.12921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.19268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0336608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7776794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.93603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.03143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.80999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1252136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8119201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8907165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.94305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.81951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0461730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1175231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.93389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.06170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9703674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9158630371094
############ Running episode number: 517  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1942443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8441467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7694396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.26800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6930847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.81756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7784729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.94036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.02685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.08843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9583435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9966125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.93182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.21575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9661560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.25048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9844665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.89483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.14007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.18231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8813171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7826232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9777526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.91741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.14471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1802673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.11669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.98736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9715270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9060974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9766540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.05621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9938659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8053894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9328308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8793640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.75225830078125
############ Running episode number: 518  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7508850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0417785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8941345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.03521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.02459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.88604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7355041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0588684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6405334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7451477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9468078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.99774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7438659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.02325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1253967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8630065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.78253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7610168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9272155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9961242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8995666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9601135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9339294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0373840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.68756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7792053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8096618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0483093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0498962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1900939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.00811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9781799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.13519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0769958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0519104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.94183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.83111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9114074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.02264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.02081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.845458984375
############ Running episode number: 519  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.87493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1789245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9319152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.69122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.08380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9068298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.65081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.63653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8963317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0539855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.92120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1759338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.02691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9276123046875
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 1 20.0 912.67468196 (10.24826025489064, 10)
loss 358.9912414550781
Current State,action,reward,Response time,Next State:  (10, 10.24826025489064) 3 19.0 921.2700698 (10.276491935146446, 11)
loss 359.11907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1161804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2324523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8685607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0577697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8121032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8396301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.91912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9568786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.12432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9761657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7501525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.34344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.11468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3683166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.31512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1159973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.03167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.84185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.4960632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7568664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7350769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8214416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0125427246094
############ Running episode number: 520  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2651062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.65057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.12506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0524597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1583557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.94085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.66015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.68865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8400573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1611633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9396667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.47320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8183898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.30389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0749816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2497863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.75238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1043395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.28155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8276062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.77691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2321472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9366149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7568664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7842102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.05230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8160095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8186340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.92901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.66424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.83282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0004577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.01788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.26251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9834899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.13885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0987243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.08154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7809753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7615051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9085998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.06842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9613952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8110656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.23065185546875
############ Running episode number: 521  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.63568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8479309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.06756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.75982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.79119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9575500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0951843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8614807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1649475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.94439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9085998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.47637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6815490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7396545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7300720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.02423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.13623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8185729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1773376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.71942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.98370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.47235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2337341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1860046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.82611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8700866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2040100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3581848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7135314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1178283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.02093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.96142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.52398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.28094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.36688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0970764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.81732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.65740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.10626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8280944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.76983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3036193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.48748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.44598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9314880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.11376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9654541015625
############ Running episode number: 522  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0591735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2466125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.07159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0294494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9587707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.13897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.09063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.03179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3175964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9831237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0427551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1107482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1083679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9252014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.15032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9987487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8977355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1838684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7134704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0978698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7770690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.85784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8174743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8672180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.84637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2327575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0097961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.27532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7221374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8535461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8102722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.92633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.65423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0967712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8363342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3424987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.82659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1963806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7994689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0827331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.5453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1893615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0970153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7626647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.4657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0350646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.06671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3365783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0752868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8567810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1441955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9883728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8156433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.56878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.14703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8660583496094
############ Running episode number: 523  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.6344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9104309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.79461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.16119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.04486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.90753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0005798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0967102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1819763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.19110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0010681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.90057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6209411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.74566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.5255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8034973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1943664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0160217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8294372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8735656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0888366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7472229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9848327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.08172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1758728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.19732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.98455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1090393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7901306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8039855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.88885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7317810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.90740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9742126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8697814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0101623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0948791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8934631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1684265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0590515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9427795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.83038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.01995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.12908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.34112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4616394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.16778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8543395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1278991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9440612792969
############ Running episode number: 524  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.45196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.84564208984375
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 4 19.0 982.049698353 (11.469111876584304, 11)
loss 359.30029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.88739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8158874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.86627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1448059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0974426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8905944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6411437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8409729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7232971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7972106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7687683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.74761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6874694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9960021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.42205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1830749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.78546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7558898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0029602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.00054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.14129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.75299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0841369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.12109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4742126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.83203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0043029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7737731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9685974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.74798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4184875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0014343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0983581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.86358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9035949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9299011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9601745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1622619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0213928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0063171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9368591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1723327636719
############ Running episode number: 525  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8265686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9839172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.23309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.84161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.33489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7654113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1038513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.28717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.17041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1476135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.48223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1282653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8697814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7733459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0105285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.95709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.80157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.09716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9886779785156
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 0 21.0 931.154858096 (10.624473674922116, 9)
loss 359.1640930175781
Current State,action,reward,Response time,Next State:  (9, 10.624473674922116) 3 20.0 979.256305105 (10.771376986314287, 10)
loss 359.1136474609375
Current State,action,reward,Response time,Next State:  (10, 10.771376986314287) 3 19.0 949.018321829 (10.924797168745895, 11)
loss 359.3705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1214904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0899353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.81646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.81414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7642517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1356506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9766540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.10247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8763732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.07293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.44989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.7178649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7994689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7871398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.63116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9201965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.74566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.32952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0207824707031
############ Running episode number: 526  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7431335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.25115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8332214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6658630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.83746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8298034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0617980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8439025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0678405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.52447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.10247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.5843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1846618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.11737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9870300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3571472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.79669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8229675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.69427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.7386169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2661437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9695739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4042053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1868591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0697937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8389587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.19561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8817443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3565979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.23272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.04937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.91876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1208801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6669006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.77484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.24267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.48626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.09039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.92791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.87738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.27154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1283874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9319763183594
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 4 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0315246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0752258300781
############ Running episode number: 527  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.09552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8678283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.01690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.69000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.19635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.84808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7203674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2201232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8597106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7081604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.6162414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0972595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.93048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.70904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.83880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.08709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8889465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9212341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7399597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9989318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.07232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.46893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0997619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7630920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.4922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.65142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0592956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.9628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2191467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.77825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2780456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.81103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8277282714844
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 2 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3622741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0304870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0359191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8061828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.81317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.75347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3487243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.06427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.93963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1457214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.5650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3456115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3502502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.32012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.86468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.86846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.88677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1642150878906
############ Running episode number: 528  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7011413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0081481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.0032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1826477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.74822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1883850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1260070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.6790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.82391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.7034606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8592224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1232604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.70208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7898254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8564147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0766296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2199401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.91229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.24810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.38311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.78179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.11676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.11004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0444641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.1017150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.36627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0101013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0069885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7057800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.33062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2503356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7297058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8628845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.02032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4063415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.9084167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.6768493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1414489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.85345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1658630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7831115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.87091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.31170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9309387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7618103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.82269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7543640136719
############ Running episode number: 529  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.56683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.03814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7084045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1003112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.67138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6886901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.0952453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9811096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0603332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6949768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1657409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3858337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.03411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9868469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3211364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.6533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8938903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.89111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6515808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1814270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0997009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.82855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.05499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.02752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8597106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9069519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0513000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.83209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9476013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.86932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9789123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.85302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.73785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.1750183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7565002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.74652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7326965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9123840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6934509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9407653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.26190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0301818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8981018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.13653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.9450378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.83636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.00714111328125
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 0 21.0 1225.69057988 (16.295120821876548, 9)
loss 359.0426025390625
Current State,action,reward,Response time,Next State:  (9, 16.295120821876548) 3 20.0 1276.0866986 (16.667936385136993, 10)
loss 358.92706298828125
Current State,action,reward,Response time,Next State:  (10, 16.667936385136993) 3 19.0 1261.79596106 (16.836383524612351, 11)
loss 359.2996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.01104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7388000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4383850097656
############ Running episode number: 530  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.01861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1539001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7988586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1165466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7734069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 361.03765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.81732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9772033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9225158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.01397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8245544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.89166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.16693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1498107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6147766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8218078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0154113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7079772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.98480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.1582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0888977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8407897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9480895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.4947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.47344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.00616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.94757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.89813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.24383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1060485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9685974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4197082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.66058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.1053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.25244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.27349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.2986755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.82940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.24407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2659606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.84197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.5930480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.7769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.06585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.87908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9403991699219
############ Running episode number: 531  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.27130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9721984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.83160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7662048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.86785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.67303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.5409240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.7373352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.15423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.79302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.07147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.75543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.80523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8697814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.14306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1106872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1957092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2298889160156
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 2 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.01361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1650085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.65057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.76165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2915344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.78009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.91680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.99505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0555114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3044128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.80352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.69403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7460021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.13397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1777038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.99212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.79461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.80126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0742492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.28729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7826843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7474670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1482849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2078552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4648132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.73382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2025451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.22052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.5059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9281005859375
############ Running episode number: 532  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.76910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8140563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7776184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0479431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6370544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7322082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.79931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.2681579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.04534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.21258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2345886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1228332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1254577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0668029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0364685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.09295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.76739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8196105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3109436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.69354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.73406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1333312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3610534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.97760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.98992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.40264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.96051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0596618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7004089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1703186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.00665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.65313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.05535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0879211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8454895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9518737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7027282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.45501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.75506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.18310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.19134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2226257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.86297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1575012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.0632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2157287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.3065490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.81573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1011657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.09698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8301696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.84521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.02978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.5784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.81494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.0491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.77301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8747863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1022033691406
############ Running episode number: 533  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8013610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0906066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9967956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7528381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.18865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.5119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7495422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0837707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1120910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2357482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.90814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.26251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.82525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.84130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.82147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.83465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.18658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1692810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.09747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1059875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0995178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.7328796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0364685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.22283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.22296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.69927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.78570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0989074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.79705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.20556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0709533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.02947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.05615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6219787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.25555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0525817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1393127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0370788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9270324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0099792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3653259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.05145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.3575744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7596130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7077331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8085021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.26922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9529113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6296081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.08416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3319396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.75189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.649658203125
############ Running episode number: 534  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8736267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.7771911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.83660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.14398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.81787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.7645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.0318908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.25537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1690368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7699279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4659118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9746398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.7522277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.76617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8725280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1466369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8800964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.52960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9376525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8173522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.18231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1593322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7353210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8572692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.75042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1755676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.89886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.41192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0641174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9741516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0185852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9704895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0550231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0436096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7317810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.07598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0921936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8808898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.7102966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1677551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 360.10736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7046813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2153015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8177795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.84564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0124816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.82965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.1379699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1578063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0516052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8108825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.12664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.9288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9226989746094
############ Running episode number: 535  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0824890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.84722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5552673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3450622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1282653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.8103332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.5116882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9877624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0467834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.16168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6296081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0296325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6208801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.37469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0220031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.82183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.9079895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9705505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.82098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6141662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.36151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.04962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.9850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9809265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.7826843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3334045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.30108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.13739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6849060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0867004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9130554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7789611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0002746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.79180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8069763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1084899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8667297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.09967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9461975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0476379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9176330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.29510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.81195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1291809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1615295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.75579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.20538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.09161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.8237609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.62713623046875
############ Running episode number: 536  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.70587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8423156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.15875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0235900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8270568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.06707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8821716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7007751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7917175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.19281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 360.1772155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0038757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1452331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.96478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2785339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.08642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.52520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8193664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7381896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.10528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0932922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7967834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0961608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.04254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.93914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1109924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0039978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.06268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8097839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6745300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.44793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.70709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1510314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.09136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0141906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.86767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0092468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0500183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.1860656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0637512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0652160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4013977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.08258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0429992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1322937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7464294433594
############ Running episode number: 537  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.08917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.7460021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.16766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.67999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.6831359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.85595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.04852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1308898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8787536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7237854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0559997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7856140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.09197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9104919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.07330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0558166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7726745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.13616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8618469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.21197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9267272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.7027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.50750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8432312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.74359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.06500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.91644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8656921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.1473693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9382019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0859069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.11822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0932312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2183532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.98822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.94232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9797668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9668273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9995422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.75396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0951843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.14422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.98663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9089660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0499572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4850158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9313049316406
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 358.9454650878906
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 359.00006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2052917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4033508300781
############ Running episode number: 538  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.72772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.61236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4585876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.67669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.32269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.3869934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.31317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.6703186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6999206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7434387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.5632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.83355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2615661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0463562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2937316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9512023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0188293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0979919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.93499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0001525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3223571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.7666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7723693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.04193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6692199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2693786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1485900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.72467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9500427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9085998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9522399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.12579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.23309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.82012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.75823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1791687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.79193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7548522949219
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 0 21.0 1301.78496219 (17.944480812078613, 9)
loss 359.1201171875
Current State,action,reward,Response time,Next State:  (9, 17.944480812078613) 3 20.0 1362.4225545 (18.385807405229915, 10)
loss 358.946533203125
Current State,action,reward,Response time,Next State:  (10, 18.385807405229915) 3 19.0 1352.9188695 (18.671267839956315, 11)
loss 358.7929382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.6827087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.34490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.8539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6747131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9975280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8982238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9955139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.3074035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8619689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.05255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3492736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.7585754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.79608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8944396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.0648498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2491760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.3478698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0232849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 360.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.91949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1971740722656
############ Running episode number: 539  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.81072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.12841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9844665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.40948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0089416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.76458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.84552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.3599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.28936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9665222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7788391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7519836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0934753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3250427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.61175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.55096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7661437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7751770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.7835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0260925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7909851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0211486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.77508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.66839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8113098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.76849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8616638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8793640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.81195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9960632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 361.0337829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.37353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8002014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.41400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.20196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8938903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.24822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8362121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.99755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2423400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0306091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.55352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.67529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1237487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.6702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.31256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0720520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1404724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.03790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3739318847656
############ Running episode number: 540  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.81903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.11761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7591247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.83355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1346130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.87078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2823791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9601135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.11676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1566467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0131530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.02264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1325378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0480041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.02716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.02166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9864196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.06353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0845031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6329040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.38177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0122375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.15631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.93243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4069519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9300842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.82861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.06756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.6664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9368591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.25421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.92120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.38140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1051330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.99267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.87811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.33770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.74017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.88616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8141174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.14556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8869934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1499938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.68524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.66094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.93768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2987365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.4842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6872253417969
############ Running episode number: 541  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.13934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.74884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9762268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9788513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7432556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.23626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7348937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8164367675781
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 1 20.0 919.104778396 (10.388469398680568, 10)
loss 359.0848693847656
Current State,action,reward,Response time,Next State:  (10, 10.388469398680568) 3 19.0 928.707336523 (10.344006106602812, 11)
loss 358.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9739074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8974304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.83758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.00518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.61175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7224426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5836486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7004089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8389587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.28594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0426940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0129089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.7030944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4430236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9844055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.2447814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.84552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1465148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0729675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6400451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.47064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0693664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.44073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.30572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2466735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.39324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.75689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6540832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0527648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1160583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0663757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2937927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.50079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.16168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4276123046875
############ Running episode number: 542  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8050842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9506530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8002624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0399475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.35369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8611755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1976623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1539001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.37164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.24853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.47528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.06414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8469543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.7396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7973937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2895812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.53863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8415832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3786926269531
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 4 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.09698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7721862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.97662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1023254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8233337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7304992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.84649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.38775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.5461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8774108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.84307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8278503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6677551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.78271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.09033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7709655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.06976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7574768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3633117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8117370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.10162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.68170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.06036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.80841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9617004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7615661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.45965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3797302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9042053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.85150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.28582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.8294372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2979431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7914733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0459289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.31427001953125
############ Running episode number: 543  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9795837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7408142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7630310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.52105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9251403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.96917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.52056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.87451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7089538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1978454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.27117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.36712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6980285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.90289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.98455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.55712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.40289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1098327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2494201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8479919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.23321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9273986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8618469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2274475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2903747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.90234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.13629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1569519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.86920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7484436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1687316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.56109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5902404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.01641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4979553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.20623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9969787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1311340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.83978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8174743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8218078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8627624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7311706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.28485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6523132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.76312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.69512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.09088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6193542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.32342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5829162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9169006347656
############ Running episode number: 544  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.84552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.3046569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9104309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7392272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8357238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.09979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0917663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.76849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.11737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.91461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2432556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9707336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0470275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7398986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.13348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3332214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7685852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6645202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2065124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2357482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.60003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9946594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.70654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8229064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.08856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7424011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.36456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.1259460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7811584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.03485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2333679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6772766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.33587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.55352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 360.17413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.04248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.93359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.07061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4170227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2393493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7498474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0887756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.5504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.14825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3355407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1481628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9106750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.93084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.80816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.85003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.66888427734375
############ Running episode number: 545  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.84295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.10443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 360.40533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.81585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 360.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.66363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.86846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6744689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8657531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.12469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.5858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9288635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7653503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.5354919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0856628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1701354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.88568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0708923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8934631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8880310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1722717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4600524902344
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 0 21.0 984.787563682 (11.819721938468785, 9)
loss 359.70257568359375
Current State,action,reward,Response time,Next State:  (9, 11.819721938468785) 3 20.0 1041.82165315 (12.19918626616789, 10)
loss 359.26324462890625
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 359.01763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.42950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1127014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8738098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1300354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9593200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.77532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6678771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.02203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.20556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.95233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.70977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7902526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.77117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7276306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0446472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.78009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8708190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9056091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.00054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.40203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8587951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.97833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.72393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0746765136719
############ Running episode number: 546  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7134704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9908752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.2182312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.14605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.32318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.85369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0536804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0602111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8298034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.25439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7206115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.8607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.15625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.94854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8318786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.19720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8304138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8833923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2167053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.84356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2303161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.13958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7785339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6196594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0569763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 360.1193542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.09197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.83087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9131164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7734680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.80584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.19091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.10650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.4292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.71630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.52154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.8474426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1270446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0436096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.30718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.88531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.66180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2080383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.01806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2479553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.80157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.68914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.30316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.56207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.5158996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0355529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6646423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9411926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9959716796875
############ Running episode number: 547  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2577209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7325744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.71771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.74072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.03265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.9271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.25677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.9161682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.31610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.28814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.84832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 360.1560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.26727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0799255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3144836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.9050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.08673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8634338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2845153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6266784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1848449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.2752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1603088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8677062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.46612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.22705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7591857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8231506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.38311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.28094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.48577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.65875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.9640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.65484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.2711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8321228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.61346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8739929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.64459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.98297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 360.1383972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7433776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.76251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6333923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1973571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.01678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8595886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.42584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2978210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2960510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.43609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.13311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.6717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.4835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.01641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.23919677734375
############ Running episode number: 548  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1570129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.08203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1749572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.1748352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.64202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8009338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7712707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9341125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.76226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.7837829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.92236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2736511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8120422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0015563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.0681457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 360.1529846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0033874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8227844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.35284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 361.40692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8144226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.59783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9064025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0079650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.68035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.89788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 360.24774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.99462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9388732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.42633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1048278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.30169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.81207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.84637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6224670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8329772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7452087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.96710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7771911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.75592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0310974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.72235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3194885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9281921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9091491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0604553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9186706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7182922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.6299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.43255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0793762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9196472167969
############ Running episode number: 549  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.85107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7254943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7450866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.7945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9324035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2992858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 360.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8517761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2245788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8317565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0851135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.17315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.87554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.26708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7991027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.71234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.13751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1991271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.81292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.81109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1007385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.36199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.71270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3439636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.89520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6464538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.6439514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2023620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.79278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7410583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8251647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.2026062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.72869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0758972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.38238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1789855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9948425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.96514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.24310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9090881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.11016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0199890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3570251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.8099060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.08282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8432312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.8428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.20733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.80218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.8734436035156
############ Running episode number: 550  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.18853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3706359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.89599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.1958923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9106750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9533996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7077941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8395080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2500305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.97882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.33770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1927795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.77252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0131530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1391296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8882751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1365051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7449645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.73974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1522521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.50128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1942138671875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 4 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9393615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.96197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.42169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1804504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0370788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.66888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.88397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.99774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7750549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0541687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.46649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3340148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6936340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.4017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2881164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1883850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6102600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7134704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2755432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.92047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.0171813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0213928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8479309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.41107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.69891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8127746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1688537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2898254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9398498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.7602844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6210632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.57879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.72808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.0853576660156
############ Running episode number: 551  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.74359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.47332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8702087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2524108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8814392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9588928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6788024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.07666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2375793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0888977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.7527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.93145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.62994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.80615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.35430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3926696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7117004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6696472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8736877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3702697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0325012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1770935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9762268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8262634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.11669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.67333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7458190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.1178894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8223571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7183532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7991027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8250427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0445251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.88897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8711853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3310852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9147644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.60943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.22418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7481384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.95697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6891174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.06805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.96588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0054626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1763000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8507385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.74493408203125
############ Running episode number: 552  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0779724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.78466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.92938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.70294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.18017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.45196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.8318786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7368469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8469543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.79620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.67803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.75592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0335998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9164123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1600646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.10235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.86370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7717590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.82342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.6107482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.25152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.01617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7574768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8710021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.67608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0681457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7706604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7717590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3667297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.11505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9149475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8672180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.31329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.01495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2104187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.91748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8406066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6561584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.72119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1322326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0542907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7707214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.81549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8095397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8273620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9416809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.00457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.69207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8456726074219
############ Running episode number: 553  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7231140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8769226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.12738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9060363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1095886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2548522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.37591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.13031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1457214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8769226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1736755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7550964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0885314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0143127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0893859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8223571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.82373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9301452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0641174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8305358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.6422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.8280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9938659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.80133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0483093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3323669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.83782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8540954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.93701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0290832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1518859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.94903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.00128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8404846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1429138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6622619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.7145080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8273010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.63043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8492736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.86328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.7139587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6936950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1132507324219
############ Running episode number: 554  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.25390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.05059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.87908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9061584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8537292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.93707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2472839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.83587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2203674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.92657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0144348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8221740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9037170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0266418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.93182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9742126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3491516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.94354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.73529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0572204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.81005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0883483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.61993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.7409973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.14361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.16009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.74407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0402526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2850036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.49609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.95416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7864685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.74853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.6230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.05963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8058166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.24957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.80853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8260803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9117736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7450256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.11480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8484802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0284118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2326965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1181945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.86199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.08203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9423828125
############ Running episode number: 555  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9256896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.83551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.10931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0025329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.83935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0116882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1295471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.12701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9423522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8324890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.21746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.04931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.35882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1377258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.8260803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.8451843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0577697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.69561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.11865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0124816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8835754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3522644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0501403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.79296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.81103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9819030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9745788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0795593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9031066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9896545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1207580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0124816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9418640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.14849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.99163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9552307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.07965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9393615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8454895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.04840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.23883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9008483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8693542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8569641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1846008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.82843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0606994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.64202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0586853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.87347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9973449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9679260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7050476074219
############ Running episode number: 556  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.08544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8707580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.91448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2511291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9416809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7905578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7077941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1728210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8056945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0932922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0289001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7422180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0622863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9716491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.02215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7851867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.03900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0038757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7726745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.05694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.25408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.06561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.88214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0412292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1163635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9051818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0475158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0606994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9286804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8332214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.11993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8686828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7916564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.86767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.84619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8931579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.88128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7816467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9983825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8008117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.08441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.40899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7467956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9127197265625
############ Running episode number: 557  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9194641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.66033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.02435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1733093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0365905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.75177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.86700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.18182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.00738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.07196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8283996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1513366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0740661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.90997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.76043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2152404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.60260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7723083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8328552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9872741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0831604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8323669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.00457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7111511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.84234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1963806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7167663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.14599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.85498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8594055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8157043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9958190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1950988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0664978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8968811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0002746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2151794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0039367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7215270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8047790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7541809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9084167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9408874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9410705566406
############ Running episode number: 558  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.81195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.75994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.90570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1448059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9910583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1401062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.00830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9568176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.93658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8208312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.75787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1290588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1332702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.87774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8139343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8995666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6618957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.69940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.05908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7903137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1626892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1215515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.84600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0406799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.08203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7989807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0129089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0488586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0657653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0502014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7312316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8497009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2604675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.99310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0613708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0854187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9354553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2328796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.99603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7646789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8523254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2062072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1250305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9986877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9346008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8343200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.97991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8867492675781
############ Running episode number: 559  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8178405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.13629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7361755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1222839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7749938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1664123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8264465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8768615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9746398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7460021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.10589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2024230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0926818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1018981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8221130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.955810546875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 4 19.0 909.642131904 (10.276491935146446, 11)
loss 358.86383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1512756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8780822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.12591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9597473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.03961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.74322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0413513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7939758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.68743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8318786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.06097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8030700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7406311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7909851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.05572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9117736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.01953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0840148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.75604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8961181640625
############ Running episode number: 560  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8009338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9804382324219
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 4 19.0 974.154538113 (11.336751742492702, 11)
loss 358.92938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9132995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9423522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.78302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.07427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.97039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.79010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0983581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9902038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.88885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9063720703125
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 1 20.0 937.284736847 (10.924797168745895, 10)
loss 358.790771484375
Current State,action,reward,Response time,Next State:  (10, 10.924797168745895) 3 19.0 957.1563561 (11.039747673816453, 11)
loss 358.8270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.85589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.19171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0495910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.26239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9394226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2467346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.5899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8205261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0378723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2276306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0448913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1960144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0523376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1690979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.22015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.48089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.32843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0198669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8560485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8187561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1899108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.96746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.79327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.81365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9351501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.24822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8777770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0585021972656
############ Running episode number: 561  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0494079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2108459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.6249694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7804870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2502136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.96734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0295715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.19012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9677429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.10784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.24176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.01495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7986755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.42156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2155456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9811096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0891418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.28009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9841613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.06439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8362121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.15234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.5205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7370300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1019592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.88165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.31866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9601135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8072204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.03900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.09649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.6922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3310241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1438903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0433044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0050964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9996032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.5928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9034118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.02349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.89801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.80548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2272033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.83209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7536926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.22821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.87847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0414733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.01190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9948425292969
############ Running episode number: 562  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.47540283203125
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 2 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.25299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9527893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.6548767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8446960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.87554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.16888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.07476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.35137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8104553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.76788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.15655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1310729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7910461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3167419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9537658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8617248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0032653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7533264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8548278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1996154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9369812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1367492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2129211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0052795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.79486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2817687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.92413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9613952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7612609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.90802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8249206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1902160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0215759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9756164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0542907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.09197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.92608642578125
############ Running episode number: 563  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.69085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8221130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.21478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7900085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6736755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.94683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0729064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4797668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.96514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.55450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1014709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9678649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9358825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8784484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9460144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.88702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.87017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0745544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.19232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7825622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6470031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1246032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.22088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.90802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.10369873046875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 4 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7927551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.03521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2540588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0329284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.5495300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.13226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8215637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1912536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1517028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.31304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.94708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.86248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9576110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.78857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.07275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8231506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.16583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.28173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0324401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.27752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1238098144531
############ Running episode number: 564  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8731384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.02911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7461853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.21160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1517639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.86688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9979553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0268249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8951721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.5785217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.78729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.47479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.86981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0595397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8968811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8115539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7915954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3960266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.94775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.05194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.40478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8269958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7983703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.14190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.13897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9811096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.80999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9858703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0512390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9529113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.83349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.15240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.90399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0345764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7374572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9736328125
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 1 20.0 1258.07554888 (16.237094554670044, 10)
loss 358.8970642089844
Current State,action,reward,Response time,Next State:  (10, 16.237094554670044) 3 19.0 1238.94234737 (15.950694610794756, 11)
loss 358.8375549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.79461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8539123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9112243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0468444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.3999328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.35980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.77197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1312561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9062194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9659118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7569885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0076599121094
############ Running episode number: 565  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9283142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.38116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.96966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.85711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0190124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0357971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2160949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.01983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.92181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7418518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1178894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9621887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7850036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.77203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.12799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.96038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2175598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2421569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.88165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.09466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.31414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.14495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1050720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.31201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.29339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8797302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8518981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.76959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1371765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.20269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0268859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9805603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8962707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.39990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7207336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.25762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.92266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.06634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2282409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.72967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0606994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.76519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1354064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1125183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0286560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3204650878906
############ Running episode number: 566  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3732604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6266174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.13104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.05194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.03228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7286071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.6377868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0387878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.43292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1855163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2958679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9526062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.85699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.09234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.92620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6792907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.32666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7823791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9303894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.77545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.88006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.67462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0679016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8845520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.87432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3462829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.04010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0528869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6209411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.85382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9287414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.49859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6489562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.34808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.985107421875
############ Running episode number: 567  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7786560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.41033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9293518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8574523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3761291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3949279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8164978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0810852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3724060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.6811218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2445373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2854309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.67626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1402282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3888244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0705871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.93890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1912536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0319519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8365173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9257507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.80950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0639343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.5748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0936584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.82391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.32208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8046569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.03338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0513000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8226013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7327575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0622253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9238586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.63104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.51055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8371276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2906799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0310974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.28765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.25225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2547912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.94854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.10162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9601745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8774108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4783020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7161560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.85302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8073425292969
############ Running episode number: 568  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.06268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7334289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.00640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0068664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8325500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.5889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8110046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.84710693359375
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 1 20.0 937.12351295 (10.772009508959538, 10)
loss 358.9962463378906
Current State,action,reward,Response time,Next State:  (10, 10.772009508959538) 3 19.0 949.051873418 (10.644925616761762, 11)
loss 359.31243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0785217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.24017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.82843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.02105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.77874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1504821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.52691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.44329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.69293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.83392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8845520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8736267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8211975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3283386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.29736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.81805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.98297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2814636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.90216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.87335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8194885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.19842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.33294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.39306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1636047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.79150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.6707458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3479919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3952331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.99676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7452697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.31988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98583984375
############ Running episode number: 569  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4828186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8247985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.30712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.28546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.06903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.28375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0644226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8182678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9722595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.5209655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8985290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9664611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.24395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.23773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.91705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0863952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2220764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.33306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9755554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3088073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.73028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.30120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1072082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3754577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7480773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.64898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0007629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3456726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.84014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.33721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8027038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5957946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6024475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.11328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8193054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.71844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2380676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4896545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.34320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.18304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.33123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2166442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.34027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7241516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7968444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.24688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.97381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.37646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7427673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7330017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6870422363281
############ Running episode number: 570  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2748718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1462097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.11370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0060729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2389831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.33441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.85052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.5224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2995300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8334045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0296325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.13140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.25115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.76678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3514709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2924499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.15997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9143371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2155456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.53338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0287780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.95660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.83349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7450866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8702087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.52764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9454040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.88751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1196594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1844177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.97833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8275451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.43414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1664123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4723815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.84930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9883117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9543762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.09320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0003356933594
############ Running episode number: 571  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.79913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7929992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6499938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.79437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6698303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6960144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.96435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.40667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0315246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3602600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4299011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8299865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8711853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8407897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.11981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8911437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4678649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.37872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8575134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.35174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9464416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.35888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.07391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6828308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.80615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.40997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.54388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.40509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.20166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6755065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.06927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8409729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.69671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9083557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7590026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9765930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2426452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9512634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.79510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.94818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9807434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9986267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8001403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01416015625
############ Running episode number: 572  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.22900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8214416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8189392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.09344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0990295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8697204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.5360412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.5859069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3800354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.30133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0413513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9197692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.62969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7934265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1425476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8924255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.02923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0954284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9040222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.95196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7861633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.14239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4841003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9667053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.11712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.6802062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2760314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.16339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1576843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7981262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.33697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0932312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8573303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.27032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0276794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0132751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2364196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.46563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2345275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.71966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7494812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9603576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.69818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1755065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.85418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8476257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.04290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8709411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.12908935546875
############ Running episode number: 573  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1007385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.15386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9137878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3926086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.10687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.97113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7097473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7878723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.37921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1043395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.15283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7713317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7120666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.87249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.62567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.05816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2706604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.14794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.08270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.82196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.15264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9149475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.99114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.43060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.65435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0823669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.16748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.81414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.83001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0498352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0913391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.42486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3800354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1139221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1370544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.65325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.26171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.09613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1070861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0554504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8563537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.92791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.48150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0576477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7008361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7405090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.76068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9618225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8641662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0412902832031
############ Running episode number: 574  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8433532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8512878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.91864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0152893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.62847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.95623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0400695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.38311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8514099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1252746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.34466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8951110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2060852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9615783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9031066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.87640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0434265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.03668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0238952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.4374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8081359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.02020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9809265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9385070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.17864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.71661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1191711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.72802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.14794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.68951416015625
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 1 20.0 1339.12370397 (17.82724819986867, 10)
loss 359.3495178222656
Current State,action,reward,Response time,Next State:  (10, 17.82724819986867) 3 19.0 1323.29060362 (17.229782241685768, 11)
loss 359.3144836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.5299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.73388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.74371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.93914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6778869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0538635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8356018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1129455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.6912536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9071350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1468200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.83843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1213684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.23333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8639831542969
############ Running episode number: 575  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6281433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5692443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9844055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.91265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.18914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.02166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0913391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9062194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.44232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.05780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0580139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.26177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.4048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2369689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.4481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.76483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8900451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1499328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.29766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2206726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2936706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7319030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.89935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.02093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3387756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.78936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8407897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2544860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7650451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8138732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8501281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9803771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.25469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.31170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1977844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7273864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.09979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.98956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4064025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8217468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1803894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1015930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8511657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.93505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8585510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.56976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.09063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7349548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0683288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0887451171875
############ Running episode number: 576  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.96026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7778015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0978698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.26025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1309509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.51531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.99090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9678039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9668273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0552062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.97039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.78564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.63287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.05224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9839782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0954895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0022888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.12225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8346252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.92913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1219177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.96221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.85015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.80712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.85992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7875061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.84857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.94781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8890686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.71893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.18243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.75408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.5271301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.09796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8140563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9394226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7605895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8556823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9464416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2237854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.38153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8554992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.01788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.09027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6064758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2062072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4667053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.12689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9067687988281
############ Running episode number: 577  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9191589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6918029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3244323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1010437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.66162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.50299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.07403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8384704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3805847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7998962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.83087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.45599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3091125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.14483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.78826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.04248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.35711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0005187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2712707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9100036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1269226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.36029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.15032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7898254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0169372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7193908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1321716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9078063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.03814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0107116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3540954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6386413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6618347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.44525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.17864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0530090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0954895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0798034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.90313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.25299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4267883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.09295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0165100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0746154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9080505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.27899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.38787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7753601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9474182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.86859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0719299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.86737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0647277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.34759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.31634521484375
############ Running episode number: 578  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9674987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.36444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6968078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8763732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.42169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.71807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3310241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2143249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.13616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.05364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.94775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1011657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3160705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8418884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8251647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.27490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1288757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8876647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.22930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7829284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9881896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0749206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3727722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.00579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7384948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1490173339844
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 1 20.0 1379.54110953 (19.385636054792762, 10)
loss 359.0482482910156
Current State,action,reward,Response time,Next State:  (10, 19.385636054792762) 3 19.0 1405.95387237 (19.223969507401588, 11)
loss 359.65618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.07049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.12347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.28009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8775329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.01593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.44818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.7511901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0055847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.89434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7163391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6914367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.44635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3318786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.15228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6636047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3798828125
############ Running episode number: 579  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.88360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.02978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.81597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.42193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0471496582031
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 4 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3640441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.05633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8713684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.38433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2503967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8160095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6685485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2840881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.20880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9184875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1472473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.19586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8733825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4952697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1776428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0125427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1407165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8666687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6662902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7515563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7312927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.93585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2378234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.51275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.40704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2499694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.82183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7208557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8101501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0839538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8379821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.17266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.75347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7734680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8883972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6990661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9057312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1975402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.84466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.51373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4715270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.21527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.67608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9156799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.22833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2859191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9433288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.13250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.92340087890625
############ Running episode number: 580  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.85186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.02679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0755920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.5896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.80584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9385070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.88946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.88348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0466003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1412048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.04803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2406921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8227844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7021789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9149475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.11883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9928894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3451843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.79180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8276672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9333190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8610534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1104431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9017028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.80755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.28692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8352966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.99041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1640319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.78900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.5189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.19561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1784362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.40533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.99871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.33917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8767395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.50079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.4752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8031921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9117736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.63037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8114318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1841125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8282165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1416931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0971984863281
############ Running episode number: 581  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9840393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7496032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1383361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.89324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.26287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.00152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.05169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.44879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.13824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2449645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.8206481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.94317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8103332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1639709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1116027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3545227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.79803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.84649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0637512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2590026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1650085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8725280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7660827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.80615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.57464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8993225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2651672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.03411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3346862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8934631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0270690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.02569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.11383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.53106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4771423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2115783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.94598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0697937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.08331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1178283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2646179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6329650878906
############ Running episode number: 582  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0198669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0264587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8365783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.12286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9999084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9257507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8594055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9575500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.10235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9208679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8844299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8449401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.90728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.6044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.5981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1241149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9273986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.25537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8764953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.69659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7523498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7986145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9833068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.99664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.53009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.80059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.15203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93585205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.05517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9757995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.41680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.72869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6466979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4062805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.84527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2135314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.22821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9607238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8308410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8264465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4140319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0632019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9058532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.55181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8081359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7905578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9490661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.73345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.98101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3630065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.88427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0653991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0007629394531
############ Running episode number: 583  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9856262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.95379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.77532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3927917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7549133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2127380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7173767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9063415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.43841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0642395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8962707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.69775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8854675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0934143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8011169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.97308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2807312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8293151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1877746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0022888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.91448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2681579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.85247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4729309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.21820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.78955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7815856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0741882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6802062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0246887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.33721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1780090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7903137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9725036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9150085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.05224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7164001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.80426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.03265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3240051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.44476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0365905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.02337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.28472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.99371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.83209228515625
############ Running episode number: 584  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.83514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.41839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.75714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.79669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.13934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8553161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1045227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8373718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0361022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7625427246094
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 2 19.0 914.701547126 (10.319026962956018, 11)
loss 358.89697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.11053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8908386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.8487854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8521423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.66571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7521667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7672424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9135437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0044250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.99755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.00286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7978820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7510681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.76544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.54217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.47760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9983215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.45330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.82122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6733703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2481994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0864562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.92999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2248229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0989074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.07366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.27032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7462463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0096740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3015441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7537536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.24664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7719421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.40338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9966735839844
############ Running episode number: 585  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.93023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1974792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.82110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7864685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.78802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.10125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.83367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.14959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0079650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.15740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2260437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6728820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9197692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.35198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.28582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8002014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8040466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0401306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.13360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2327575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8585510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.03466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.42376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7862243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7927551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3631286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0100402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1147766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3882141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.67254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1116027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6609802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.20050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.18218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.6950378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8160705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9242858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.75653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7265319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.08062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8817443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.16558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.81573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.84881591796875
############ Running episode number: 586  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7428283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.16424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0321960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3888244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.89056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.4253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0060119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.93133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.21124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0972595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6977844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.07379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9715881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.95611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9855651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9197082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.93878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.75714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.54766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0379333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8179626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.93701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8161926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9083557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8398742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.14666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.4179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.73040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.79638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.81353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2792053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.4133605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6554260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1580505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.72125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.32720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.07928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.79583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6114196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.80145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4812927246094
############ Running episode number: 587  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7701721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.93817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9002380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.88446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.83538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7680358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8468933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.71405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.67803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.74462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7408142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.90802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7627258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.86456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9746398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9356994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8174743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.08905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0632019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9963684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6447448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7406921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8247375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.84844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.36248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.02288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8617858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.77783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9242858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7781066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9727478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1676330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.6862487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0691223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.22021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.05731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7284851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0412292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9288635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7632141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.77587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6746520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1659851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.04833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0055236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7749328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2642517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9706726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0948181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9315490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.98162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.19244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7992248535156
############ Running episode number: 588  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7343444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.6756286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2851257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.14678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8928527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.04315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9201965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0322570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.12322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1329040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.77008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0434265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7748107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.82421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.14764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.85595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.87872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.27154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8635559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.86407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.97113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.97784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8424987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.38525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1291809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.53387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4936218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1596374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.68292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1327819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.02130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9983215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7720642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9794616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3341369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.04345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0488586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0779113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8111267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7816467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.76861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0010681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8182067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7779235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1213073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8542785644531
############ Running episode number: 589  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.91949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0267639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7707824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1532287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9673156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0428771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7823791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8251647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.69525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.92645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.00469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0467834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.13311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.89300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1044616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.89166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2413635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.20611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0541687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8110046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0117492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9929504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0885925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7716979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0876770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7931213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1422424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0903015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.71734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8255920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7901916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.10107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1182556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.90277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.04998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.81005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.23797607421875
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 1 20.0 1211.18755114 (16.004586266677634, 10)
loss 359.1944274902344
Current State,action,reward,Response time,Next State:  (10, 16.004586266677634) 3 19.0 1226.60915635 (16.017694914042416, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2163391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1173400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.87548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.27056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.23504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0988464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7110900878906
############ Running episode number: 590  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 1 21.0 1029.06659875 (11.973514343585284, 9)
loss 358.8498229980469
Current State,action,reward,Response time,Next State:  (9, 11.973514343585284) 3 20.0 1049.87192659 (11.786394321941378, 10)
loss 358.9757080078125
Current State,action,reward,Response time,Next State:  (10, 11.786394321941378) 3 19.0 1002.85899476 (11.61852219546234, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6330261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8767395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.25543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.6673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2168884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.33831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.29742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.8135681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9368591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7509460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.42999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.5774230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.76513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8865661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9998474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.01153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.6875305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9963073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1480407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.87298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.25555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.80450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.06915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5094909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0965881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7283020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.63043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7754821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3355407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 360.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8525695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.09283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2561950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6432189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.3016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.23095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.39862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.17816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.96136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.80523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.25726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.25762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7655944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2855529785156
############ Running episode number: 591  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0941467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.94927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.8907165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.16241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.08721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8976135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.30731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.72845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.22113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7206115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.5107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5692443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9177551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6537170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1842956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.86932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.40826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.68255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.98968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2499694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7776794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.4669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.60418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8775939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0115051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9715881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.12030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0820617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.80694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0199279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7980651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1675109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9144592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.88677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.87127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0925598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8639221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.67071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0121154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7509460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8353576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.67559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7026062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8930969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0128479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.98712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.0613098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9580383300781
############ Running episode number: 592  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2164001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.08880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.04034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0236511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.26806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.87823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3739929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 360.11956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.21966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8180847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.06341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0823669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.88189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0494079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.51885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.45556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.00506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9275207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.0386657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.92803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7245178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.85333251953125
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 1 20.0 1028.70793389 (13.168618569876575, 10)
loss 359.03997802734375
Current State,action,reward,Response time,Next State:  (10, 13.168618569876575) 3 19.0 1076.17782493 (13.649658108197247, 11)
loss 359.0699768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7732238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.46405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.76568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8157653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.32586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.05694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.20867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.6794738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9856262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1270446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.12615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.17791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9826965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.98406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2781677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9787292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8075256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.66937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.26763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0746154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8578186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.11328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.56134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6650085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.6796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.88604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.9527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8762512207031
############ Running episode number: 593  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.05303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8265686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9953918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.94866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.71099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7211608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0950012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7735290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.51947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2299499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7661437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0708312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6919250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2715759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2939147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8258361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.48968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0509948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7921447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.62322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.8577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3455505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.32489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9259948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.17254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0954284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2276306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.30517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7383117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7192687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.68548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8775939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7709045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.28948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1006164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.6578063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.70123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.17047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.24420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.0975646972656
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 1 20.0 1225.69057988 (16.295120821876548, 10)
loss 359.30072021484375
Current State,action,reward,Response time,Next State:  (10, 16.295120821876548) 3 19.0 1242.02029803 (16.667936385136993, 11)
loss 359.1462707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.98260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.228515625
############ Running episode number: 594  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0852355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.5967712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9851989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.85955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.43145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0577697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.8213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.14141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.15380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3064270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9896545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.0233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2936096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8638000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.8045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.38116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8962707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.45672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.25701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7445983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7873229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.42572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7226867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7893981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4902038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.6391296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1731872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3503723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.29156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6830749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.43798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2428894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.22515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9875793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.73101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.00555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6431579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.28704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0831604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3147277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0029602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.92510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.81298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7237854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.5994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.10845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8729248046875
############ Running episode number: 595  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.27398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6224670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.77716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9457092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0569763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8380432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.4046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.74700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.65203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.6390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.83184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.11700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.17095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.92279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9975280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.01092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.91339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.89727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9158020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8368835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8656921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6762390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2428283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.93170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7210998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.5002136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8659973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8507385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.06243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.12493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4191589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.2042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.12939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2483215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.5376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.28509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3288269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.86138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 360.3359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.26751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.89166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8435974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3509826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.81024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1228332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7289123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.72637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.50323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1329650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7932434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7941589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7664489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3790283203125
############ Running episode number: 596  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.29571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.93121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.81365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3290710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 360.4377746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3026428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0237731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.16864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9874572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0332946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.77178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.5370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.6748352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.4352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0686950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8858947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.34796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0461730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1611022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.81402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.89312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.84710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.92535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.37750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9153747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.31182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3429260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.99884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.78741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9349060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7041320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.27569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.14459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8025817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8882751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.8607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9040222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.22998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0625305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8708801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 360.3271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7853698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6199645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 360.082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.12420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.55615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.63873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7243347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.51251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.24554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0718688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3088073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0408630371094
############ Running episode number: 597  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9216003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.29010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1147766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7062683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.98480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.91796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.97125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2803039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.19281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.6856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0319519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.59832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2664489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9302673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1133117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0083923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3000183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.05010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8634338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1481628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.45281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9696350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.6978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.03692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.74273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.24169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2204284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9833679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.99688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.38531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.7069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0769348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8530578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.05340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.36846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.80523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7037658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.84063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1860046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.20648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.41265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.98309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0547180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.1361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.6676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7628479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.5541687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2480773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.84881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9512634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.5874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4766540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.4273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.2287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.39453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.5029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.43072509765625
############ Running episode number: 598  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.17559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0076599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.35015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.9005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.31109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3164367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.09820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9507751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2120056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.6700134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1059265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.4017028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.54425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0227966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.73944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.80462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8661804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.30633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9324035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0420227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2392883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.00054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7530212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7417907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1320495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0113220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2411193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.1274719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8775329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.81414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.99395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.02886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.5351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8515930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.37554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8732604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2166442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9369812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2051696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7723693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.4817810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1063537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.59808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.90863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0426940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.14361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3923645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9715881347656
############ Running episode number: 599  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.49609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1927185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.47576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.04864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8396301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.10699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 360.4129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.8360900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3701477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5685729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.65869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1404113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.7676696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.35235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.62127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0990295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7654724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.01422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2505187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2669982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8238830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1477355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7749328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9518737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 360.4220275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.13909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.68804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9085998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0270080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9665832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.6546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6831970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.0635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0604553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.13336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2610168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.04840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.07232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.14654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0396423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6462097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.98968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6706237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.15924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.3606872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2785949707031
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 360.98675537109375
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 359.29693603515625
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 359.35614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5797424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0070495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.28887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0066833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.54168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.86090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.80889892578125
############ Running episode number: 600  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8205871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.04608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 360.6376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.06158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.16070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.17431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.97979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.01336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1258850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8907165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1805114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.69805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.09844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.63330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.6911315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.4438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.93402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9426574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.6752624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1529846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.35845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7779235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.3097839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2767028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.68853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.89727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8693542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.7082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.13812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.00421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.62359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.38140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0188293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8992004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.43536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.9158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.01239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7997741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.85943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3340148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1124572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.83428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.08221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8224792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4234313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4109802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4093933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2077331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1435241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3096008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.82049560546875
############ Running episode number: 601  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1108703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.34466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.76983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9278869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1272888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0845642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0179138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4148864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3601379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.4713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.09869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1034851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.72613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7977600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.26153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.15966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1443786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.82244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1773376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.84912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.95648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.10321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2037658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0414733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.90228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.18353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.67681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.4187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.25738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0055236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8160705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4114074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.56939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.8455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.18994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0498962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.6340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.4669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0588073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.94207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.7883605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.35626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.37213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.1131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.76904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7685241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.56427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7806091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.40875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.64215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.28326416015625
############ Running episode number: 602  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.4523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4408874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.90936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.95904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.79974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1969299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.04534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.71832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9150085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.7715148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8876647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.85064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9469909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0799255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.15716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1184997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8264465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0488586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.94854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0564880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2799377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.84832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.81988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.79852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.9284973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.7017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.7386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.25482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.12921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.71527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.96636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0569152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8497009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.13226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1025695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.88690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.97076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.18267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1015319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0670471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.8673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8714904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.16314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9591979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.08935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8717956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.56109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2041320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9272155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.38592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.7923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1315002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.843994140625
############ Running episode number: 603  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1929016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8671569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8882141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1105651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2820129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.87225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4138488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.5865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7252502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9621276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9826965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8882141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3156433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9613952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.88067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.23345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0082092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3946838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.99749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 360.0938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.21954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8025207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.67913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.96661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.83795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.12005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8975524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9688415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.66949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0300598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9346618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.80084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7387390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.2889099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6799011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6939392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.28448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.92529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7916564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0997619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9542541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6795959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6966247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1506042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.85546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.24737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9909973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7040710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7778625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9864807128906
############ Running episode number: 604  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.76214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.8750305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.07623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0075378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.8177795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.94171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7176208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0400695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3598327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9537658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9349060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2400817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8889465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.83343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8736267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.04534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.22137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9178771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.82470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0141906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9399719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8211364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0051574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.85821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8029479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6838073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.5609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1176452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9073791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0403137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.79754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8825378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.5809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3855895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.3921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.6956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.9501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9313659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.9102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7273254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.76312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.17889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1813659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.16546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.91650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.5577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.64471435546875
############ Running episode number: 605  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.85247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7682800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8851623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8943176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.85101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6638488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 360.23834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2154235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.6923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6158752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.82489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.89959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7859802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7179870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.06072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.99005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0066223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.1632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1368103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9552307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0610656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1873474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0248107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0437316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.57745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.80438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9192199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7902526855469
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 0 21.0 1063.96010023 (13.649658108197247, 9)
loss 358.9366760253906
Current State,action,reward,Response time,Next State:  (9, 13.649658108197247) 3 20.0 1137.6097809 (14.283719188889453, 10)
loss 359.0060729980469
Current State,action,reward,Response time,Next State:  (10, 14.283719188889453) 3 19.0 1135.32732476 (14.677479537099185, 11)
loss 358.8638000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3330383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9040222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.65234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1125183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3306579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.14129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0690002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.03253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.1974792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8044738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.79443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.4144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.24072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9372863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8899230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.6205139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.7857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.02484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.76202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.84197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2336120605469
############ Running episode number: 606  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.79266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2027282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.12603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9844665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8761291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1911315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.9665832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.86322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.81121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1237487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.39801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 360.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6379699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.6595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.67205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2651672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 360.0191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.74310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.8252868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.98095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8996887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7082824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1572570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.01385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9573669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7153015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.17767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.06011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4665832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1991882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1098937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.26422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.5579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.93896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8371276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.89068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7671813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.8374328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8823547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.49951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9508361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8816833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2310485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.14306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7201232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0399475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.03997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9697570800781
############ Running episode number: 607  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.23541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1225280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.85308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.62420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.7249450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6918640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1028747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.80126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7186584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.6005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.48095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 360.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.6161193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7835388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.79437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.4821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.78375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1029357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0685119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.08343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9591979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8423767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.7537536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2591857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.6708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0622253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.02362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9515686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.8252868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8387756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7319641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.20654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.78961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7091369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9382019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.92620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.27166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9977111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.35064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.21319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3146057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.5423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0663757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.90545654296875
############ Running episode number: 608  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7755432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.79486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.17706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9746398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.83990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0001525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.02459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9338684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.6144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0324401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.25640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.83367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2611389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.57598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3320617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.18646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.89093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.46112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2209777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8992614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.01995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7912292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.95452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.5333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9128112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2909240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1147766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.84039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.24676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9003601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.95660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.1845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1116638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7498474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8110656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9252014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2897033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.25018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8932800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.84722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1983947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 360.235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0015563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0037536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.01556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0408020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.92724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.24505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.05596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.4686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7699890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.09490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.79364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7939758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.77947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6780700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9617004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.7160339355469
############ Running episode number: 609  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.8973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.00628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.82275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.27520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.7646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.03326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.66815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7281188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2806701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1158142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.4306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7367858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.5871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.82568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0842590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.11431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1054992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.18902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7483825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7384948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.70452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1107482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.1681213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.49981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.20587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.75360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8561096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.48138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.85821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1017150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 360.01861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1497497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9883117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7209167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8663635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2726135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2909240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0731506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8996887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3058776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 361.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.09735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 361.06396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0072326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8814392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8899841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.4681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.88470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7507629394531
############ Running episode number: 610  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 2 20.0 1029.06659875 (11.973514343585284, 10)
loss 358.96142578125
Current State,action,reward,Response time,Next State:  (10, 11.973514343585284) 3 19.0 1012.7846064 (11.786394321941378, 11)
loss 359.3128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9747619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.58880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1073303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9739074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.3141174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.84161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.3854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0586853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 360.47186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0025329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2623596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.39788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.23480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.98681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.79302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.57220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.78375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.69677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 360.1047058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3819274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.86749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.96807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7464294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3354797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.78515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.07830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.86785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1689147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9145812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.98602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9617004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.13409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1756286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7640686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9450378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9989318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5827941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.83026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.6728210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6921081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.78485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8524475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9579772949219
############ Running episode number: 611  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0832824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.12982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.26409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.92266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1320495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.05322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.93060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.88177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.24053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9378967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.7647399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.15087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.89605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1249084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.78118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0787658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7424011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8063659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9173889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0287170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.8241882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0642395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9246520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3102722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1311340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0605163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8713684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8597106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.69879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0091247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8639221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.5870056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.09503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8930969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8931579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.89154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.90228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8468933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2062072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1670837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.3385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7380065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.92999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0874938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8576354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.23101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.94171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8763732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.04010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.35205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7914733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.97161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.72808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1272888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8015441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0177307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0429382324219
############ Running episode number: 612  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8221130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.90289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.91748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0638122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.49169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.07098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.7549743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8404846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.0166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.75335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.51043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1629333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0831604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6449279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.10491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.12347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.84503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.30206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 360.0859680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8062438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.41204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7901306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7444763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.68096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.8062438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8149108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.21826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.0113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.40771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.70062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.25311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.03399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0776672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.8288879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.92816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8571472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.13726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.84075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.99359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.24053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6495056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8402404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9361877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.41680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0826110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.11834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.87725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 360.04547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0730895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.17169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 360.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.31085205078125
############ Running episode number: 613  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7565002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 360.0009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.70751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.88385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.84893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.22113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1278381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1508483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0318908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7868347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.9979553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.07366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.03570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 360.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1293029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.87542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6622619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0207214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8773498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8538513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4958190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9601745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.17724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.82611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2084655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2213439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8457946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.77178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8697204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8492126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.2417297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9996032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1840515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3386535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.6097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.5487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.66546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.5364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9371643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.7557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.03717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7305603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0139465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9515075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.32257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9526062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9971618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.78857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8316345214844
############ Running episode number: 614  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.16229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.15386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.76373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.6686096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.95233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.32122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.2806091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8417053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.89434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.18182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1930236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.28594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.74249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7887268066406
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 0 21.0 912.494916918 (10.278181486298042, 9)
loss 358.8638916015625
Current State,action,reward,Response time,Next State:  (9, 10.278181486298042) 3 20.0 961.129617982 (10.268274366284802, 10)
loss 358.8987731933594
Current State,action,reward,Response time,Next State:  (10, 10.268274366284802) 3 19.0 922.331700166 (10.335411397720526, 11)
loss 358.8282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.6977844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1795959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1591491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1477966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8118591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.55401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7559509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.42584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6527404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7989807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.3403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2542419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6607971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0429992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.51531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1188659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.96990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0017395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.27032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.8858947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.76861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7416687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0562438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.8282165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8851013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.97406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.8216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.7341613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8653869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.89715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3558044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 360.16571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7421569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0938415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.04815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1280822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.13385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.72900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1955261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9753112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.0759582519531
############ Running episode number: 615  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.5406188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7636413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8486022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9029846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.64093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0059509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0751647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9648132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.72802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.72283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.29425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.76226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.67498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.78887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.2831726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.9189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7347106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.77813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.8017883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.6265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.96661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.31011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.8583068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0276794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9826965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2810974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.81951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.35150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 360.0745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 360.2588806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9228820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8750915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.7648620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.00213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8704528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.29803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.76910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.15264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.16204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9978942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0608825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0932922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0666198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.16949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.4192199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6434020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8592224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1294250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.50848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.97296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8544006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.70086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7121887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.10113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8970031738281
############ Running episode number: 616  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1860656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3181457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.6138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2157287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.82818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.8543395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.03521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.4137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0212707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.03741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.07122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8663024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3631286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.28826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.22393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.3967590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.81463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4434509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.80413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.8370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.18084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.04229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9373474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.52337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2424621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0761413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.78350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.36773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0318908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1133117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 360.1702575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8980407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.85498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8198547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.31817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0096130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.15667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.91192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.6461486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2198791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0694274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0503845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.10186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.21710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.74261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.20465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.0431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.14434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1575012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9222717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6722717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.06011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.93157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.55908203125
############ Running episode number: 617  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.93914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9370422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2654724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0336608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7400817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.83636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.74530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.82794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.36065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2325744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.79638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0122375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.6086120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.98443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1692810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.83837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.33026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3869934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.75146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9960632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9610290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1798400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.36724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.88677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.9773254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0740661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7250061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2151184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5537414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0802917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1329040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.08697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.40216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7959899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3473815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1057434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9615783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7148742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.11785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0555114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1006164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.2872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 360.08795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1412048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.67535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.4296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4398193359375
############ Running episode number: 618  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3044128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.84722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2478332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7167053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8074035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.96282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.27984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.72418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.95477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1913146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.52880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.19061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0469055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.35845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.3450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.79730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.16302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9982604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6055603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.92620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2057800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.85064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.42889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9496765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3030700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 360.0611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9305114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.4488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3241882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.83172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0623474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.20684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.07672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.92999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9936828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.04022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.55352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8490905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0932922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1949157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.79779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.26226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8090515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9071350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.66888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8963317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7450256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8325500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1159973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.90625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.90313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.76953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0389404296875
############ Running episode number: 619  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0852966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.20562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.81500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1701354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.3360900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9239196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7491149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.38470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0766296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.4344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.76275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.33282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.80438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.36578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2754211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.39703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.85888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.09320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.91387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6725769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3609924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.19647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9844665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.86962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7577209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.7503967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.14501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7874450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 360.0654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.9273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.5506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.03985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.15045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9020080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8769836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.34381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.3224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1955871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0790100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.32525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3013000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.01275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0912170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8346862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.81634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1706237792969
############ Running episode number: 620  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9438781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7720031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.6402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.78466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.77947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8764343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.64654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0326232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9706726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0726013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.09320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2553405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2895202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9551086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7438659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1797180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.00360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.07440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.52435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7899475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7046813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0722351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0469055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.06829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.95513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.05450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7716369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.39208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1095886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0793762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.01043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0688171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1596984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.91412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.55340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.97064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0104675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.4835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1570129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9099426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1225891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1288757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0068664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1070251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8144226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6279602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.34368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1021423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.7562561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0613708496094
############ Running episode number: 621  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.5469055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.06536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.80218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.14093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.7524108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1073303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7601623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.88800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9789733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8109436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8592224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8529968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0286560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.6617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.13934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.68902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1783752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8626403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.72357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0699157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8598937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.98468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8236999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8079528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.04815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.71600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.59149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0704650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7980041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9087219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.15570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7689514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9477233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1622009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8292541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9805603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9400939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.4646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.13970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.9229431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.10009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.88006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1897277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.89556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1062316894531
############ Running episode number: 622  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1791076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7641906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9375305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9704895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.08624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.77569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9585266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8894348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1209411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6989440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7331237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 360.20928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.12249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.78717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9173278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.27093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0619201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.78973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7905578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.80029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9292907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9322204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9616394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6415710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9975280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0066833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.80419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8481750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9789123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.78839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.08392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.74517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2013244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0943908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.11328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2796936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1044006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1753845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2632751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1515808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.75933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.11590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7113342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.8498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1487731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.05303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0805969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.19598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2252502441406
############ Running episode number: 623  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7769470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.17352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1515197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3122253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0498352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9592590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 360.23870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.56463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1942443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.86749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.93792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1938171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8890075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8231506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1029968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7967224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1329650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9756164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.63787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.03271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8858337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.97027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8100280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0439147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0530700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8975524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.18963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.2322082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8595886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.83935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7873229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.14202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.88592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 360.0417785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8027038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0800476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7214660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.18084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1092834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.13629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.28399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8421325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.1130065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.87310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7947692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.01141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.82928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9186706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 360.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.84149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.34832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.31121826171875
############ Running episode number: 624  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2275695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9470520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8622741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.95062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.6761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.26153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7755432128906
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 1 20.0 925.789969445 (10.489125480251131, 10)
loss 359.3653259277344
Current State,action,reward,Response time,Next State:  (10, 10.489125480251131) 3 19.0 934.046546974 (10.448897752470936, 11)
loss 358.8706359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.9855651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9242858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.84210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1321105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7773742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.9939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.05255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2008361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0834045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1201477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1893615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.6080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7280578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8197937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8570251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1263732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.75616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8999938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0506896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9371032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.21710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8753356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.21038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0338439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0603942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1728210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9685363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1034240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0483703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9514465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.01104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0039367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7132263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1463928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.59124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1653137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0085754394531
############ Running episode number: 625  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9728088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2111511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.86279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7783508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1987609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9136657714844
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 4 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.4430236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.41668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.4991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0743713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2359924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.87298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9226379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1461486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.98406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8334655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8262023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9045104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.25115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.91796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1508483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.4097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9354553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.98919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.74615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.13427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7685241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0125427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9328918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9952087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2119445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.11492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.4147644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7895812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9201965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1488952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7893981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9542541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.94830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.15606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6361389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2646789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7314147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8181457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98480224609375
############ Running episode number: 626  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2077331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0108337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9877624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.85565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0657653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.89215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0466003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6466369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7610778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.88299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.09088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.88751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0729064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.70318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1830139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.05450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0227966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8283996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.06011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.84320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7992858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9370422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7383728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8036804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.91912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9875793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.32623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2715759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9839782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.80670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.13262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0763244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8049621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6695251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.90374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8180847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9743347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9629821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.20147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0680236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.06634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0179138183594
############ Running episode number: 627  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.97991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9189147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7997131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.22430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.75079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.77947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.08160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1083068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9426574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.92376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1455383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.18316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.81512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0215148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0214538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8793029785156
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 4 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7447814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2248840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8784484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.72509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.98370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.19195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9175109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.80389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0607604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.88330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.07366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.34381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9058532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2203063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9204406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.80328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1105041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1611633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.4716491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2532043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.99884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.05078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.04931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7748718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2784118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0060119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0446472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9564514160156
############ Running episode number: 628  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8288269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.73150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8782653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8192443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.93109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.85064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7684631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.53289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0937194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.83148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8337707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.05792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.74517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9432678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0530700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7217712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2005920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.06549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8526916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0658264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9284973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.32470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1225891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.31597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.40045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.85552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1885070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.72943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7837219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.91015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0511779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8877868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.70721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1197814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9909362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9725646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.4693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1810607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8111877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9082946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1680603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8369445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.49310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9143371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.33648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.82904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9479675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.97125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9483337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9850769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.4247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.16693115234375
############ Running episode number: 629  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8449401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0382995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2337341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8966979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.08294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1156311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.94097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.98052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1103210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9662780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.42376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.18701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8547058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1837463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1267395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7572326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.96435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2416076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.14111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9085998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7710266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.29010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.83905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.12640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.94439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0106506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.83160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.10955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.09173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.5345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.65106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2911682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7104797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.01165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7033386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.04083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.04925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0953063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7455749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.75537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6806945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.43817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.05816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8204040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.01483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0893249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8171081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7355041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9593200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8481750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.91229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8109436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.72503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.888427734375
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 1 20.0 1248.87152463 (16.836383524612351, 10)
loss 358.9034118652344
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 358.950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0158996582031
############ Running episode number: 630  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.4023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.81195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.95867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6905212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9149475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.26873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7514953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.68231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6012878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3277893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9579162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.3761291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7692565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.41680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.93109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0585021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.4300842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.03302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.19586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4782409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.82098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1073913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.00030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0552673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.44915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9773254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1459045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3102111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9538879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.75982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.55133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0514221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.00921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8300476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.32275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.13287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9710998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.01849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9840393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.94000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.76568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.37567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.25518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.88238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.23974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8575134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8365173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0829772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.812744140625
############ Running episode number: 631  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8000793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6695251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4905090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.02490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.5303649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2487487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0434265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8742370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.95013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.12939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1749572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.79644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7843933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2542419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.35662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0751037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.87762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8759460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7327575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.77679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.58624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1202087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9089660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.80316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.82366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.5188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.77777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3223571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.87225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.81243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.87890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.85357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.94342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0226745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2288513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6468811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8397521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.46710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.4642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.9163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8598327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9722595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9753723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.5643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9353942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.5163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1956481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.04541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.94317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9737548828125
############ Running episode number: 632  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.00140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.01690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3462219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.05615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9009094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9765930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0967712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.5715637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.5674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9798278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3094787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1643371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.4620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7759094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8094787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9141540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.91522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.86297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0370788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9344177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.8641662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1860046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.17022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.86785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7791442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1639709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7994689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.94110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9352111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.5970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.35540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0368957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.38214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1332702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.85552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.95404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.82476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.11981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.5303649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.14471435546875
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 1 20.0 1210.97093797 (15.828704162850809, 10)
loss 359.03857421875
Current State,action,reward,Response time,Next State:  (10, 15.828704162850809) 3 19.0 1217.27964986 (15.550833128512703, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.07122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4037170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.35748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7615661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0382385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.56939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6731872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.85626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9140930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.80987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0243225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.87908935546875
############ Running episode number: 633  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.4766540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9808044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0489807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.15496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0296325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3710632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8349304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2663269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.44232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0422058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8352355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.77294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.6100769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8521423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.4123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7418518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.99163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.52972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.82818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.34478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.10894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0333557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.92645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9541931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8360900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.53369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7787780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.62939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9216003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.85028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.36181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2036437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9518737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8374328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0368347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2793273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1149597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7468566894531
############ Running episode number: 634  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.14019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.11053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.28851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6839294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.75189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.83135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0151672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.36273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.76641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8412780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.00445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0665588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8182067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.42669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0546569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.76177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7741394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8299255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.80377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1725769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6797790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0544128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8713684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2176818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9024963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1835021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.4261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.86505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9251403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1717224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1694641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0926208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9844055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6725158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6575622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6634826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2177429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.4393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9837951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0983581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9828796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.86083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.75640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.54864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.54052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.99505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.78424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.85870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.4064025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1670837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0987854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9004211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.91644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9725646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3099670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7821350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0155029296875
############ Running episode number: 635  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.84466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8388366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.20501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.28228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1601257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.89459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.72198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7282409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.31683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0055847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.52960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.32891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9550476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.07611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9905090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.47100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.33197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.20599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.32684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0099792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9407653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2893981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0468444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.95330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.93359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.41973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.07879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2243347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.36273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.04693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1679382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.33636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0542297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0125427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1858215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.70013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2434387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.4053039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1465759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.79266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.5973205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.29742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.38720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.96240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.72381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.40472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2468566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.421875
############ Running episode number: 636  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3578186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8305969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.44647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.22613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8794860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7477111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.5694885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2710266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.78643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1031799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0894470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.18206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.84136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.87298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.4670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.93414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0592956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7395324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.72125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.74578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0486755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.09478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8899230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.6484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.4574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0828552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8439636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.00567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7260437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8918151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9400939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.4708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.2322692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.4221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.08001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.29058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9108581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3448181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9897155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.94122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.96337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.29742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2554016113281
############ Running episode number: 637  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0299377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1719055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.4389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8663635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3801574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.99481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.4147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0961608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2503967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8353576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9292907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0823059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0299377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.09637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0869445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.15240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8305969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7483215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.75567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3055114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.01776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8724060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2512512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2789611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.6835632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.31976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.3874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.02032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8147888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.21417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1621398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.83197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.93792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.01702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0395812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9062194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9831237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8766784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7450866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7887268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8868713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.28692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7820129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.05670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3795471191406
############ Running episode number: 638  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1703796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.23162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0342102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9438781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.34136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2799377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.40625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.92877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1268005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7588195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.82763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1482238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0076599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0425109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0036315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0669250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8263854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0600280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7905578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.36260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.25067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0106506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.08929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8664245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9695739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0514831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2467956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7358703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.08770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.17510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9104919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.85467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9622497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.3515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3035583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.01763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.09393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.15655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.77911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9915466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1424865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.14862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.3349304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.4195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9025573730469
############ Running episode number: 639  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.7750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9370422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0653991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8652038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.82281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.89593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.12799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.5080871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9074401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.22491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.94830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1269226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8666076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1424255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7586975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.91448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.83831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.76568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5332336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1325378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2063293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.24462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.70477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.84619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.09466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1197814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.29217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7820129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8322448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0949401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8363952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2745056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6898498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6527404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7875671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0653381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.72003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.82989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2970275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1773986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.99603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8774108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2209167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8337707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8394470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7672424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.75653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9823913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.70611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.06243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1535339355469
############ Running episode number: 640  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.78302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.24407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.82562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8848571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0590515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.74853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.92803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8377380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9087219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.03125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4173889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.78204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2104187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.94781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.98614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9005432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9685974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9170227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.92071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.12640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.23114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.75201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.66082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0303649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.97650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.96112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0014343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.03271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.37213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.76312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0446472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3606872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7156066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7096252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8232116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9454040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7550964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.97882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9709777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.74371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.05755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.12744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.01708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4005432128906
############ Running episode number: 641  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.10968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.14288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.65545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7499084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7776184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.88482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.11224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1118469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0148620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8756408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.73114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.3106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.70697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8943176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.78662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.05029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8611755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.73883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7673034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9953918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.91668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0030822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.73486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0188293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.98626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8967590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.29052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0750427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.41375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9877014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.76904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.5164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.98004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.99285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9109802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8322448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8537292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.5538024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9356994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7808532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2322692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.41448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2259216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.88519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.98455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.86309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0393371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0534362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.96240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0173034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.918212890625
############ Running episode number: 642  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9576110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0298156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.24151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7073669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.5082092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1251525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.21649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.24761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.81390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1389465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9297790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.28729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9338073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.87652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.07196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0519104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.37744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.77667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0528259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9928894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9894104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0293273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.08258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.85162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.74786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0821838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9251403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0492248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.03741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.4320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8239440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.83184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0569152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.83392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9483947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8554992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.68450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1554260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0965881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.64813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9742431640625
############ Running episode number: 643  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.83935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0624694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0603942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.6974182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7987365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.10260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7249450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8786926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.04052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9372863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0227966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.78265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.09136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9338684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.73486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7441711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9350891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.01177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.72625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.77447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1510314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.03515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1587219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7109680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9158020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1033630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9228820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.74627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9858703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1095886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1152648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9868469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9039001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.55010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7549743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1546936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7861022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.14495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8137512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8715515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.81201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6407165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.08831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9136657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8555603027344
############ Running episode number: 644  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2306213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.11334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.04974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8771057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8424377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.78460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.76739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0114440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0204162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.93426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6390075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.70989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.04315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0466003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0025939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1952209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0241394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7596130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.73602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.07452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.69610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.80438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1154479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1636657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0963439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2077941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.01373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1932067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7687683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0528869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.95330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1125793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.16961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0211486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0315856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.04779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0641174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9350891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.87200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.05426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0326232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7286071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.87542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0301208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8082580566406
############ Running episode number: 645  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.99530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0487365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2486267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.85345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8362731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.11444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7437438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9386291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0074768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.79248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1731262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0101623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.98663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9537658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9061584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0837097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.13116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.82110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1357727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9883728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0515441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.91766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.12677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8560485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.21453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9047546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7752380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.16693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1149597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.87200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1288146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8886413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9997253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8703918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1866149902344
############ Running episode number: 646  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1044006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.15411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.14349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.94287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1181335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8564758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.15472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.08148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.03033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0138244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8110046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.96075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.03912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.21240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7635803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.6250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.80633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8144226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9717102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.92071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8178405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.00750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.93670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.27825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8807678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.90673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9760437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.78863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.77166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7522277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0629577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8693542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7737731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.92449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.77679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9043884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.78875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.11846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7591247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.79156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.05084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.84844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.82952880859375
############ Running episode number: 647  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.74957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.84259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.10198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.07171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7622985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.03790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9510192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.15960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.17694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.08526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0422668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8047790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9071960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7431945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0603332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9212951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.898193359375
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 1 20.0 909.642131904 (10.276491935146446, 10)
loss 358.98699951171875
Current State,action,reward,Response time,Next State:  (10, 10.276491935146446) 3 19.0 922.767593645 (10.236991269871366, 11)
loss 358.83441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.75054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.04241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8040466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9230041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.96533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.79241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2081604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9051818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8561096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.14556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9631042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2510070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0515441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.66925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8232727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8542785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.10284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.57373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7247009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7302551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.19598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6996154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.45849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1485290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.84259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3070373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1418762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9963684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.88201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8220520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9384460449219
############ Running episode number: 648  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.07843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.34710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.21063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1502380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.80902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.03619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3518981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.02801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1934509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.3970642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0914611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9117736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3257751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9891052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1635437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0532531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.04754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6908874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.41754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.03363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.01617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.3440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.16387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8954162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.02130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7582092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1381530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1227722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9684753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0516662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7034606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.14117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0673522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0481872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0400695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9239807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8753356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.80047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1673278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8151550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6872253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8493957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.74798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.15460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.5982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.08929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.04876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9042053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8791809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1959533691406
############ Running episode number: 649  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.33282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.4426574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2535705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.93170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8262634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.26318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.95697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8813781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.30535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0495910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.86328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.80377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.89727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1330261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0630187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.6796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8793640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.01422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1806335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7087707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.13232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7406311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.70794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8824157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6847229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9756164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7792663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2746276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.67071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.32342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1598205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2789001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.18670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.69451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0821228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.98858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.71514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.23931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7975158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1157531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7509460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.81048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.88934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.80926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9194641113281
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 1 20.0 1203.91462651 (15.829956988360925, 10)
loss 358.7705383300781
Current State,action,reward,Response time,Next State:  (10, 15.829956988360925) 3 19.0 1217.34610485 (15.892373986997768, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.75787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9303894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.65838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2809143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9579162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8507995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.86199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9200439453125
############ Running episode number: 650  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1111145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.02301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9863586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0260925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9595642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.39599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1878967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4170227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.84051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1938781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8325500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8993225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0845031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.4647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.04541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.83984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.84271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.08209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.79595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1244201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.00579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1315612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.09429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.66217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0105895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.11083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1084289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2209777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.64501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8196105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2240295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1134338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.12939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2184753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.73406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.01287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.76458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.27655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0290222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.36724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7737121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1886901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7800598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.18975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3099670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.044677734375
############ Running episode number: 651  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8325500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2976989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7876281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2033386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6838073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.13360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7652282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2240905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.98345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3812561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.3134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.12811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1383972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.01910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9487609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7375183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8478698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8770446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9079284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.86370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.13275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.32611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.06341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9476013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.42864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0724792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6650085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7627868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1093444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0984802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9479675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2705993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.29522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.24981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.11956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.4055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.21771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.78857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.38531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.21307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0085144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.69866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.54144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3943176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7951965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3609313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9935302734375
############ Running episode number: 652  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.3048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1331481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.87237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.89849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7499084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8315734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4906921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1481628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.6687927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0148620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2176818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.4270324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7964782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9008483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9665832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7820739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.32952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9763488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0702819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.25830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0968933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.12664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1492614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.79962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7245178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.90350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9269714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.5812683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7527160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.04345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1164855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.09600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.20947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8966979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.71484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0725402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.77252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0991516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9896545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.49560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.11859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.85028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.04132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1856384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.14385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.84228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.88525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.46484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.75311279296875
############ Running episode number: 653  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0758361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.98095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9864196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.4188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.08258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.90753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.02105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.5918884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.6227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.09149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.11395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.5019836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9134216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.21673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.80908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8518371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0285339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0460510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.18505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.88330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9330139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.25396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.32440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.89019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.82159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.01116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6902770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1693420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0041809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8029479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0770568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8627014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1112976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.81646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.09381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.81414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.6781311035156
############ Running episode number: 654  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2323913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6568298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8491516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.84368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8790588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.4822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9678039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.79876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2355651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.5111999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9058532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.81890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.06439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.24853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1161193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.23944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0581359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0023498535156
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 2 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0454406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3378601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.57757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1261901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.89898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.07928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.04046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2832946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.04913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8750305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.24609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.68878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.02557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1443786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0466003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7431945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1110534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.15240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9032897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.96484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.3204040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.06927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8334045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0040588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.88885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8791198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0489807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.5276184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6893615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7085266113281
############ Running episode number: 655  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8201599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7941589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4375915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0797424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0379333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0386047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0165710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.13946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8813781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8886413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8095397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8039245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.90106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2991027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.4699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.94366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.95831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.93255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3859558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.30731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4256896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.11572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.46075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.80950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.90789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2700500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0983581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.13958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.7108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.23040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1349182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9696350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.39324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.69476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.02728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7190856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7826232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9217224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.08380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9641418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.18731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0927429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1976623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8580017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.82501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.06280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.3675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.71124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0282287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5368347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0169982910156
############ Running episode number: 656  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0815124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0316467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.10748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0960998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9122619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.61810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0033264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8517761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8215637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1110534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2779235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.77093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9368591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.22406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9067687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0332946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.5144348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0196838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7247009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8928527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.04925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8678283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.81732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7641906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0729064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8324279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1253356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1431579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7900695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.3903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0126647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.6412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8733825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.75567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9809265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1885681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7577819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8190612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.96551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.17327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.10858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0921325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1090393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.17535400390625
############ Running episode number: 657  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3968811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.11956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.85931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9617004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.04315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0130920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.02032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0112609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9286804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2189636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8344421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8439636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1264343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.20758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2687072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9355773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.13299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0362243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.78326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0820007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8183288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.83544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.14251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7222595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0114440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0174865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.48260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1899108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2571105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.57952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0174865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.16278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6530456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0853576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9084167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8564147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.16815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8737487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.68707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7511291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.17303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1982727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.25115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1070861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7754821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.03564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.947021484375
############ Running episode number: 658  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.13214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1133117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8703918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.64788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0375061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.3143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.77740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.82049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.09027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.06451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8276062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0566101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.95013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7705383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.08612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.85955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0587463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.98907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.80902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.03106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8778381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.26495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.72174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.18096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0212707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8433532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.24505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9512023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9863586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.11083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.81097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8584289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.89251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1367492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.98211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.15911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1485900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1213684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.85260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.14044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0501708984375
############ Running episode number: 659  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8059997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8041687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8161926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3529968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.95062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1480407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9059143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0263366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8678283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.85479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.14019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2991638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0254211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.35406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2626647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2219543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2760314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9219665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.06024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.3936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.07940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.97174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1210021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.84222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1524963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.83441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.96246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1249084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.74749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.76544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.77471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1104431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.33795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.67706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.11578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0276794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.3836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.73834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.80810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.02142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9111633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0471496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7181091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0115051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.79229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.99322509765625
############ Running episode number: 660  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.21942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.98309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0408630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.21160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.93499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0735168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0535583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8299865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1291198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0403137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.08819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0492858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1358947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8178405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.88092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0710754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0338439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8824768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9388732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.96905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9475402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.12908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.91754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.77691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7468566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1504821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.04669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6943054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8716735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.78509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.17340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8156433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.87213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1760559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.69561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8957824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.11029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.22918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7392883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8915100097656
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 4 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.06512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0859680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.85870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.92657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8526916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0014953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0921630859375
############ Running episode number: 661  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.73876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.14044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.10687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.14569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.14593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1379699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9341125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8835754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.03411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9529113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.94781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0782165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8539123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.71429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1784362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9588928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.70758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.81292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.10357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.83056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7427673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8926086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9520568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2812194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0452575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9090270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.15264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.00750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.15460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8445129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0041809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.12091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.09661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7010803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9079895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.858154296875
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 1 20.0 1204.52470225 (15.550833128512703, 10)
loss 359.01904296875
Current State,action,reward,Response time,Next State:  (10, 15.550833128512703) 3 19.0 1202.54023315 (15.446694946204717, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.4834289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.16448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.98529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2970886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2745056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1920166015625
############ Running episode number: 662  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.86419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1841735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1351623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.04315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.4661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1329650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0088195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.88818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0451965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2239685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0437316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.02685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0538635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8705749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.82080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7809753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.62896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0155944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8100280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6843566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.4499206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7722473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2117004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0107116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7888488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.95330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6808776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.05279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8619689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1289367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0105285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.17596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.36407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8355407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.33062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.91949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.20001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8067932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1207580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0866394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.01031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.18536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.06787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.06805419921875
############ Running episode number: 663  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7848205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.07391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9163513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.88946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0740051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.95379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.10052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.03900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.32525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0017395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.3127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0841979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.93670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2671813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7690734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.4221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7583312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1872863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0986633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9721984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7878112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.23052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8647766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9483337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6774597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1112365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0242614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9881896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.51397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.15399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9130554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9100036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9975891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.96966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0260925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9757995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.12164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7536315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.35321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1285705566406
############ Running episode number: 664  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9283142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8755187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.77374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.15234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8653869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7135925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1946105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9615783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.3902893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.76385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.15252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.41583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.71636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8344421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.71728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.95050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.08941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.50762939453125
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 0 21.0 909.008683798 (10.369891240151098, 9)
loss 359.11358642578125
Current State,action,reward,Response time,Next State:  (9, 10.369891240151098) 3 20.0 965.930171009 (10.316955310454549, 10)
loss 359.0676574707031
Current State,action,reward,Response time,Next State:  (10, 10.316955310454549) 3 19.0 924.913936648 (10.333617326102203, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0151672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.15045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7324523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0936584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.15802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1485290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7673645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1227722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9375305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.4604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9941101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.83465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.92138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.37957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1872863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8800964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.6771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.11895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.27093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6466979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8033752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1466369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.00909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.28155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.89801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.81982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7487487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.12152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9140319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8855895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1248474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7729187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9798278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8905944824219
############ Running episode number: 665  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.4439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3041687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.4688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8143005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8854675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.36004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.47119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.03875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.99237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7397155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8821105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2322082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.83929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.5243225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.96490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.62994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9024963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.5977478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.5281677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7378845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1244812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.29949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.84100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2033996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8749084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3061218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2610778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.89739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3267517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1423034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.6886901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9757385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.91436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2056579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8070373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.80206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8373107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.3921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2436218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.4479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.99371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.5130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0336608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0790100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2306823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2020568847656
############ Running episode number: 666  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.21258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.78985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.82208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9399108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.91021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7920837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.8986511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0756530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0955505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0132751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01092529296875
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 4 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9090881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.39306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.29010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1981506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6579895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.05853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0807189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.5045471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.64947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1405334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.5442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.02685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7629699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1706237792969
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 2 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9143981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.87591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0513610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7632141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1330261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.7743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0879821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2762145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.69781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.9085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.07098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4725036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.88677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.05902099609375
############ Running episode number: 667  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1980895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.78387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.6374206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0672302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9464416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8882751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.83807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9739074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7393493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7904357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.33447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.6169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.15264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.71435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0769348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.15155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6893615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8342590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.09234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1588439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.00543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.87664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.71356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.79925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0417785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.03369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.67718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.13421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8696594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1330261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.78900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9237976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8755187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1036071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.80279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.73944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8832702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.80548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.7680358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.7525329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.7893371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.83538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8155822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.6572570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9031677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.3096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.27423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.50091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.97991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.5226135253906
############ Running episode number: 668  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.87774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.9622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.6377868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1040954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.67169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.75921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7539978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7698669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.5924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0573425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1631774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9303894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.8115539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.96551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.01214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.02789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8733215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7912902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0375061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.82611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1958312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.71807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7685852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6809387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3628845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8019104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.82757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.3213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1471252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.91900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0516662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.95989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1210632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.86224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9789123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0484313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1769714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.29400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0136413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1045227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7112121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.03619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.75811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0590515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.3026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.98406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1071472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.50140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.44720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1269836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.4181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.779052734375
############ Running episode number: 669  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.76824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0395812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9637145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.79229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.8632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9267883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3816833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.17486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.10089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.88067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.7198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.02593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8919372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.24530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.76385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.12890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.62054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7755432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.7066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0085144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9963073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.6174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1142883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.13299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7531433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2298278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8893737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0290222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9141540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0710754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1921081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.13873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2315368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0797424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9068298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.7832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.08709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.91741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7702331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.41705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.02520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2820739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.25433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.85247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7071838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.59954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.5995178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.08575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9938659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0216979980469
############ Running episode number: 670  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7452087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.91064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.04791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1943054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.06390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9989929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.8310241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.37872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.67724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.98651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9276428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.06646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.38330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.04937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.19830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.6880187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8493957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.70208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 360.2262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.13427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.7528991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.6297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0202331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.8925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7317810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.3374938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.6055603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7419128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9709777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.23199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.8588562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8147277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6538391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0780334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 360.1603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1238098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.3856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0616760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.3500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1915588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0296325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9201354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0943298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.2234802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1886291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8127746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9981384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.24658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6799621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.6405944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.3819274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.99896240234375
############ Running episode number: 671  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.93988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0033874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.4083557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0319519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9136657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.96331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.98016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.89459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.77276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0084533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.77239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.73291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.79168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.34503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8565979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.90234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8225402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8218688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1042785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.87652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 360.2173767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.5903625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1300964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.19207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.12176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6665344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.12738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1134338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.4980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8153991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.77227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8253479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.75958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.84442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.40155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.99920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8361511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.6307067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1972351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7076721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.16094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1217956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9761657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8454895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.06475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 360.5749816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.01959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.06829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.5746765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.3460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7051696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.06561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.84320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1877136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0492858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9076232910156
############ Running episode number: 672  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.29705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.14788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.80145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.79254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3280334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.8135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7481384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.4676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9602355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1856994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1495666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7779846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.25634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9963684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7724304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 360.0563659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8076477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.1466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7240295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2680969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0162048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.21368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3563537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.99542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8855285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.6867370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1073303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.23968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.30474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9339294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9318542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.09417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7629089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.6363220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9934997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.23394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.08905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8969421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.88958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.16387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7707824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.64141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6532287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.6679992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86370849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1894226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.83221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9906921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2704772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1492614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.81805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.37054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.99871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8656921386719
############ Running episode number: 673  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.68463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7660827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.04034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.15386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.2900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9339294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.11480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9936218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0179138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8917541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.70843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7595520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.6850891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.09136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0266418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0040588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2339782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8930969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.10443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6595153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9432678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.75848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1133728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.01898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.98431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7294006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.94036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.91241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.31390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0354309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9987487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7246398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8475646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7596740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0585021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.83892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9942932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.83282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2187194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9159851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7456359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8059387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9516296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1972351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8829345703125
############ Running episode number: 674  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9807434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8924865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0923767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.90826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.80316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.96746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.90380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.59722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1040344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0429382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.07611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.99078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1783752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7998352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9990539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.81060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.7742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0690002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.09869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9901428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0526428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8251647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.14825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.00115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.79119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.95721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.89898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9913635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.92987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8355407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7731628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.06915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9839172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.89739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7878112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.87310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8821105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7450256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1849670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8782653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.97576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1459655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9375915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.4959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0768127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6847229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7778015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.08355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.81976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.03460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2938537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9861755371094
############ Running episode number: 675  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0928649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7793273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0413513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9292907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7903747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.18426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.20391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.69512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6342468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.11419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9963684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7121887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9428405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0846252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.96893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8246765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.04010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0199279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8619079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.90350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.62103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6587219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8961486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1166076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6368103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1676940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7824401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6560363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8450622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2082824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9451599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0124206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.82666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0954284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.11419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.23065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0588684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0445251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.91058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2167663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0290222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.85858154296875
############ Running episode number: 676  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0233459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0266418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8514709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0073547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0255432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.6705017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7801818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.09051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8893737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7635803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1069641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6239318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8152770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1502380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.99395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.97100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8612365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2549743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8904724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0683898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8561706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7700500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.13262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.3263854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.86663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.04681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9740295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0426940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.09259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.87225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8951721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1418762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.96636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8290710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8944396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8400573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8976135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7604064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.15118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01556396484375
############ Running episode number: 677  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0142517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7379455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0804138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.06365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9511413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0329284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.14849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0088195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.61407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9560241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.88250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.77911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8856506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.79058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.63690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8043518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0975036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7987365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1748962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8973693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0563049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0327453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.11737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1513977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9349670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.86236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0680847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.95880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0769958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8094177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0588684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7808532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0998229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8863220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0154113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.92156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8220520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0059509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.02630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9989929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1925964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1632995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8735656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.80706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.93585205078125
############ Running episode number: 678  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9747009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.04425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.83624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0939025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.02764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8193054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9272155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9862976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.13372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0519104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9137878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7245178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.23004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9735412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0472106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1192932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8489074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7795715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0437927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8926696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.79791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8971252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1167907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9109802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9049987792969
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 4 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7365417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8689270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1136779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9989318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.79803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.68609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2005310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.97686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1584167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.88201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.15240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.79229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8677673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.01727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.96392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9331970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1436462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.05718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0750427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.866455078125
############ Running episode number: 679  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7802429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0964660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7391052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1865539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0391540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1736755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.09759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7259216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0539855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0111999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.07696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9883117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.21527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9090270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0014343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.78515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.06890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9206237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8741760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.00823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1163024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.84368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.92572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.12799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.96490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.11187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.83148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1398620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.5920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.00146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8205871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1561584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1294250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9535827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.80120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.12884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.13983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.23095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1215515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8982238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.86663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7613220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0054016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.05035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0588073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.68927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.99798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0848083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.85809326171875
############ Running episode number: 680  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0679016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8919982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.76861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1128234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0422058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0281677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9960021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9079284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8240051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.08551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.05889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0758972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.97857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7840881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8025207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8473815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1076965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8827819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2242126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0623474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0939636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0954895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8906555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.07061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1059875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7661437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.23687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.96484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7825622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8033142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2156066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8420715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8595275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.86572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7543029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7157287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0840148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.86138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2669982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8059387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9831237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98406982421875
############ Running episode number: 681  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.77618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.97845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8233947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9563903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8501892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9791564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.11395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.16302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9309387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.96343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1990051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.00543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1014099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.00347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.84295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.96990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1630554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.72515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7862243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8838195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9631652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.83917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2407531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.77752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.79656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.92498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7962951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1172790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8454284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0282287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.93402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.00323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7774353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.04931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.17242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.94976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.84283447265625
############ Running episode number: 682  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8321228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1368103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0577697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.83154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.94500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0843811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.82647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0004577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.92205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9930114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1208190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8437805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9418640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.06439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.16180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1472473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.87841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8547058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1568298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.82293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.89703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0029602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.08551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.95526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0950622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7052307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8773498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8663024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9875183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9309997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8145446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6412048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0513000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.16412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9701843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1146545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.22161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1986999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8891906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9009094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2032775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8919982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.64154052734375
############ Running episode number: 683  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.94354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0031433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7940368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0777893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1361389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.89764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1499328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.08447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7687683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7824401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9782409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.11151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0694885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8231506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.73931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0524597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7952575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.13348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1015930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.72003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6911926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1179504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.79754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9472351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0459289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0327453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7915344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.97186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.78521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0898132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7424011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8199157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.5553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7106628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.73736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.72637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.81817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8362731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.86334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.08831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9284973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8643798828125
############ Running episode number: 684  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.08795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0732727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9017639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.5583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.17767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8576354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7229919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9516296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.85772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.99224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7909240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0882873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.85382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.5699157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0452575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1202087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0194396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.97125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.83837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0634460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.03265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.97698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9356994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7265319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8868713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.04595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.87799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.95623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.17523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9797058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.17218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.77032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1136169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1370544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0428161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.80487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8747253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1969909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0137634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.16033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.82635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9592590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9072570800781
############ Running episode number: 685  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9157409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8856506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.77801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1182556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8490295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.02801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0299987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1879577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.86810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7613220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9587097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9869079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.04669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.74267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.90887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.04241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0015563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8468933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.02685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8582458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0361022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9648132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7848815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8448181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8457336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0649719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1487731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.84783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2055969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0779724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9075012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8574523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9776306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1916809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0166931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9294128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9066467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1297302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7970886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1705627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.91290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9573669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9634094238281
############ Running episode number: 686  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0968933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0097351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.08184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8502502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.89825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0668029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0322570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6408996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.79150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8153991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1118469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.74212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8569030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.98284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.99853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.99237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2002258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.94189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.86041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9740295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.24432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1189880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1366882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8141174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0396423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0686950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0815124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9150085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.96905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01910400390625
############ Running episode number: 687  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9482727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6277770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.88079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8310241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.00244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0033874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7983703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0559387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9259948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.87103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0979919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0429992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0656433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.03973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7131042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9993591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2765197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0754699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8369445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.80682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.10009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.86309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0538024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6983947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9606018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.98016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.91851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1362609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8457946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0425109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9955139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9190368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9398498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.82562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8253479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1848449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8144836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8605041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.86004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0221252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01544189453125
############ Running episode number: 688  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.94403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.04327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9944152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.86346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.88360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0767517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1499328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7945251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8125915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9140930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1311340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.76495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9341125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.5849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.03759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.88409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1226501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.55767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.87774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0884094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7250061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1169128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7466735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.05078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1069641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7461853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8760681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7234191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.69793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9351501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8492126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.79931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1328430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1065368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8824768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8367614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.82867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.09149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9707946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8554992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7551574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9503479003906
############ Running episode number: 689  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8211364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9277038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.01177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.83709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8737487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1381530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8742980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2785949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8575744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0326843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9372863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.02880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0693054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9975280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.19970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.86529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.89385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.73284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1557312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.99212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.92413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0657653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.07763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8706359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2428894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.89044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0923156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8020935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0666198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8931579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2195739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7947692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9833068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1180114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7673645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.78863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8268127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.95794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1114196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.76226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7825927734375
############ Running episode number: 690  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.20306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.6965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9745788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.96392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8816223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.03515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.90814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1714782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8933410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.78289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.96197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1546936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.84619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7442932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.80670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0978698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8984069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.23760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6623229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8323669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9642028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1867370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6465759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.63189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6776428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2043762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7726745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1178283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7469787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1770324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0646057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9283752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.97979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.90850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.92584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.97894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.16546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0230407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8365173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0406188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8646545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8918151855469
############ Running episode number: 691  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.87646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1011657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.79718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2009582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0316467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2929382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.80560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9597473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1406555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8501281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0459899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9242858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1391296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8195495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.91168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.86956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6970520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9020690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.95758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8780822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9273986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0740051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.17059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.95855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.06549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0894470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6596374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.78973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2024230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8551940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8819274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8973083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.5530090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9757995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.77825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.22125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7544860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.02923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.19146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0998229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.04205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.88519287109375
############ Running episode number: 692  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8558044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.27001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.72137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7675476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9370422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.86395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0749816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.79022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.89141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8130798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.6432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.69012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.02008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9735412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9748229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.98297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.79376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1033630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.76995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.85687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8778381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.07977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8812561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7118835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0749816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1388854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.97186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2820129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.89404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.63983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.67669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.00286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9595642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2696228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6489562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0327453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1752014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7514343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.10076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0981750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.83807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1089172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9741516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.14990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.71795654296875
############ Running episode number: 693  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.06854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0884094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0001525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.6819763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9457702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9355773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.08978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.04876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.03228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1153869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0165100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1410827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7726135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1531677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.72052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9716491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.77874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.98681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7527160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0877380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8019104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7911682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8769836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.93560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.97698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.5718078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7238464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.82733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.79083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7723083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1194152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7774353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9245300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2134704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7956848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1651916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7426452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.00030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.70672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.793212890625
############ Running episode number: 694  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8688049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.21795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.16192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9727478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.90838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8246765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7781677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0054626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9031677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8227233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.07049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.13525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7212829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6824645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.86029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6961364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.98260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.10186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7507629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.01763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1747741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.23541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2154846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.10968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0754089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7978210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8439025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2490539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9771423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0068664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9848327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.98406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8160095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.90185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.80255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7736511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.89251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0245056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.99383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.15789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8216857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9615783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0164489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.87591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7352600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.81231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1387023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9427185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9923095703125
############ Running episode number: 695  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.23858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7983093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.79180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.82330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9143371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8670349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.79644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1636657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9874572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8646545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0522155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1611633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0571594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7418518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.85205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0972595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.15631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9595031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.06829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6681823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9400939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.09381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.02862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.86248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.82171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8365783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8349914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.03302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8172912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.96661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8195495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9289245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.81805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.16387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7602844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7539367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1157531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8560485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.05224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7442321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.71270751953125
############ Running episode number: 696  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8716735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7718200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7677917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.69775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7402038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.90704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0287780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1400451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.79779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.07666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.87371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1024475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7967224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7783508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1015319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.5920715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9032897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.88800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1664733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.13385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9002380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.20501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6980285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.05267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.15179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.06695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.6571350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.09613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.01080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8867492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7685241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.83489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8138732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.99603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9136657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6445007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1033630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1031799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9869079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0029602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9389343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.09271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9424743652344
############ Running episode number: 697  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0033264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9452209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.21551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.10198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.11602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9319152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.02349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9673156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.85919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.87554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0668640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9715881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9755554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7157287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1466979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9197082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7633972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7963562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0935363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0322570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.77435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0646667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1849670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0605163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.07171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.07171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9706726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8923645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.02978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0796813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0683288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0555114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9341125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.00323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8882141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.19647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1593933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1255798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0874938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8232727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.06695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.21453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0268859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9492492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8610534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.10205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9458923339844
############ Running episode number: 698  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1407775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1630554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7594299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8021545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7646179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8697204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7068176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.19378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9701843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.13897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.3738098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9389343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0902404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8089294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.75433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9826354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9255676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.81427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8694763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.87286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.85626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7787780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7254943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.99932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9222717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8832702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.87646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9193420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0209045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8693542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0315246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.95001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9163513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1836242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.97503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.13165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.88165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2302551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.22271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.02301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9616394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9402770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.10650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7868957519531
############ Running episode number: 699  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6863708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.08477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1431579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.70343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.19110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8698425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9214172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9985656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0503845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8725280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7460632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9527893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.88055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1731872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.72467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.13592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.11334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9031677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8878479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8118591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.15155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0245056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9040222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.14752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1074523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.75860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9104919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0531005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1725158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8232116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.75628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2347717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1354675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.28106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7815246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.98431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0299987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.65374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.12091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7267150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1492004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9740295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0558166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9593200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0510559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.00360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9150695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9151916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.71173095703125
############ Running episode number: 700  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.97113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1465148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.11676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.86407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0292053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8216857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9902038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1233215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0774230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6758728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.76513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9967956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0528259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8891906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8841857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0487365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8099670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6459655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0690002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.72662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1744689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8229064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.99713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0357971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9990539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8896789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.89044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8948669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8521423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1492614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7371520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7875061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7846374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.61334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8190002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.94635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.89520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9402770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7494201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9593505859375
############ Running episode number: 701  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0671081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0397033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2873840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0658264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9623718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.92413330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9266662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.06085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7737731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8319396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7536926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9845886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8225402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7599792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6443786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.85699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0786437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.78533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.90631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1097717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7510681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0710144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9987487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1679382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.02838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1237487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.98358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.07952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.99774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9380187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7363586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8061218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1557922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0293884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6918029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9217834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.12139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0120544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.81103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.04888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.17059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0552673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7517395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0120544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.73052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6739196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.818359375
############ Running episode number: 702  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.75335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8927307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.18914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.86700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.06793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7690124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1067199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9833679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.73321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9147644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.06866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.22039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.73211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.5650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7087097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.80755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8429260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.81158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.93231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.00396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0700988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1130676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1902770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.77130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7184753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.81646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.82904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.79791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.83172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1050720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0921936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0707702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.13763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0105895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.03179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.02447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9862976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9716491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.09942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.92938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.24786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.97845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0166015625
############ Running episode number: 703  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.72967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1767883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.09429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.10516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8428649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.08111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1559143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0702819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0271301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9565734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0136413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1184997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7447204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.13775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.69610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1112976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9355163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.87042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.21331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8653869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.17218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.69293212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0810852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8837585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.78875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0471496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7241516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9476013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.87933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8996887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7289733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1641540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7949523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8362121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.97503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.81451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7700500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.19537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.91119384765625
############ Running episode number: 704  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.02935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.75238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0046691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.94305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6875305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1477355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.00433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8945617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.69415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6162414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8170471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0709533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.02978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9873352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.70123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0563049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.88604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8437805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8237609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.21502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0762023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.17254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2212829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8210754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.01422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8971252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7618713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9761657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.99737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.26019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8942565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.85546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.06146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9468078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.96063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9969787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.01861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.96112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.04498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.03961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.92083740234375
############ Running episode number: 705  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9819030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0891418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.730712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.49609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8569030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.30267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.16314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0007629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9776916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9698181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.90673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1411437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7240905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6973571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.13580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0683288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.00146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.80670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0275573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1736145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.84979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.03021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.71923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.99468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1128845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8384704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9944152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1149597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0538635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.89312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7934875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0083923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.09075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0617370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.91461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.06402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7465515136719
############ Running episode number: 706  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1641540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1096496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7251281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.96673583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7386779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.11181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0058898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.02789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1280822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8486022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.17523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1282043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9515686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.25897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0601501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7585144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.15216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.94927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.82928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0186462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3293151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.04376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.16748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0342712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0649719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.74908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0013122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.10565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.69964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0393371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.20770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.04803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.6577453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1241149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9052429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6895446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1327209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0834045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.10479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0037536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.889892578125
############ Running episode number: 707  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1861267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.02392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7485046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7810974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.97161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6209411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1098327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1299743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8332824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.03765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9301452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0785827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7431945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1969909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0386047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.2076110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9897155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8467102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.91900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.80706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9815368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.83868408203125
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 4 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9948425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7720031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1772155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8260803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.96087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.06134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0049133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.00335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8867492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8329772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1274108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0470275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6895446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1250305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0010070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0881042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9506530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8423767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9220275878906
############ Running episode number: 708  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.96368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1108703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.03106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9671936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8495788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.04949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.34918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8392028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0610656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0285339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1386413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.89056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.01483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.02838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.83349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.6334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0834045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9276428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.80853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2319641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.86578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.08416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2189025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.83953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.10223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8266296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.84466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.74896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9840393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0313415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1266174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7054138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9170227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.92791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.65435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9023742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.66046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.15069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6852111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7391052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7291564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8151550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0741271972656
############ Running episode number: 709  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7596130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0938415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.99041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8429870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.05230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0577087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.82623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.81573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.15966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0075988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.00604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9203796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0691223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8617248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.87225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7591857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8138732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.91796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0436096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7925109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.19830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.86468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0528869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9338684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.08978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.75738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0802917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.11773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8531188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1023864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.86328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9173889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9393615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9727478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6327209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9349060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.09588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1516418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1214294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.81500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8894958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.11798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0002136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1759948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0879821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7358703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.99151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.10614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8698425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.04461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0627136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9393005371094
############ Running episode number: 710  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.88787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0266418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.14105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8946838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.77294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9475402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0444641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9664001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.82525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2048034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.74688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9610290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.80810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.00537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1710510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9023742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.66558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.04498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.09136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0060729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0605163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9148864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8490295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9099426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1418762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.77752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8271789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.13494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8415222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8938293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0139465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.88214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0146179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.91192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.89154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.31427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9592590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8788146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1268005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.83709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8531799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.88189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0070495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8424377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9934997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2861633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1013488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9759826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.15924072265625
############ Running episode number: 711  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1152648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.87200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8952331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.80560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.97747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.02215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0262756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.06243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0042419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.00811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8708190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8521423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0335998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9909973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.98004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7939147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0141906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8877258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2177429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0129699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.92718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8279724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0040588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2258605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0432434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3127746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9256286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1604919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.17291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.14447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7596740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9623718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2005920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9712829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9394836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1318054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0486755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1225280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1233825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7040710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1251525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.78912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.81060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.85809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0559997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8924255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.66522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9367370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.11865234375
############ Running episode number: 712  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.6803894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.87261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2531433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0926818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7984313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.19329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0719299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.09088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7473449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.79034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2033386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1195983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7364196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1343688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.03875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9220275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.01507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.6817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1262512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1092834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8092956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0171203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.03076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1579895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9477233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.14208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.73760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.98455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7385559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.85296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.77374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.81689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.87335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7642517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8415832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.92706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.95794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.6996765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9578552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8821105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9428405761719
############ Running episode number: 713  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8125915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8576354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.23968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0917053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9157409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.83892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6862487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.17803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.07647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7738342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.69927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1208801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.72650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7628479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.79583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.08538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0563659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.72662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9934997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8266906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0345153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8885803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9173278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.82861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8208312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8180236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8775329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7796325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.04071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8919372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.03887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0945739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0284118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1570129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.06549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0824890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9674987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2259216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0755920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.98419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7780456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.04425048828125
############ Running episode number: 714  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8960266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.76397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8464660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.79736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.86505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8890686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9408874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0084533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.14239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8320007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.70037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2207336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9515075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8607482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.06768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.6679992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.90106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.18548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.00628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1225891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0937805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8868713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0495910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.89532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.76104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7964782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0889587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0314636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0150451660156
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 2 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7798767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7528991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0155944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0209655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6679992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.03057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.07000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9683532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0120544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.80548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7577819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0924377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.21197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2275085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.02813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9981994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.11553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7921447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.02435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.12615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0262756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0992736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.85577392578125
############ Running episode number: 715  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.18310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0447692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.98358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9602966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1675109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7937316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7344055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.5684509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.03912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0429382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.82635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7657165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7297058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.16204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2073059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.72662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6718444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2172546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9423522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0694885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8640441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8680114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7347106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.02874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9745788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8003234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2088928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6362609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.72039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.92779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1588439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1132507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9764099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8108825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.09735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7315979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7622985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.3293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9104309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.89361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1991882324219
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 0 21.0 1229.17115431 (16.667936385136993, 9)
loss 358.91583251953125
Current State,action,reward,Response time,Next State:  (9, 16.667936385136993) 3 20.0 1295.6017535 (16.836383524612351, 10)
loss 358.9783630371094
Current State,action,reward,Response time,Next State:  (10, 16.836383524612351) 3 19.0 1270.73108663 (16.845818065953559, 11)
loss 358.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7103271484375
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 0 21.0 1269.21706044 (17.215992726625572, 9)
loss 359.146728515625
############ Running episode number: 716  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8439636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.5857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9045715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0680847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7828063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.38507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.8143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8017883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.43365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.9010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.5724792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7624206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.92144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1026306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.3903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2139587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8381652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0722351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2709655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.70941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.77288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.85357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.03668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.95819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0974426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.22955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8901062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1882019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.6390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.7925720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0566711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0242614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2117004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.8001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0537414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6817321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.08563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.77154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7217712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7683410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.02496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9162902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2463073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 360.78277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0387878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8670349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8559875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.81085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.14239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.6958312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.58990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.66387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8479309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.01666259765625
############ Running episode number: 717  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.84014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7290954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.83929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.70849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.7350769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.32989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9358825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.06787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9922790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7364807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7669982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.77593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.7864685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9626159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.65374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.7501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7586364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.06829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.5423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1338806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.00732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8371887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.54046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.08062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9978942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.74078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0600891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0498962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9893493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.22747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1443176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.87725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9005432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1109313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.7815246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8236999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.5348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1358947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.84027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6872253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8704528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9958190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.86566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.7438659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.32525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7699890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 360.106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 360.6822204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0680847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.13055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.4089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9858703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 360.6177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7845458984375
############ Running episode number: 718  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 360.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.2269592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.7735290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.86468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.81060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 360.32366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.77191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0694274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.81451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.24169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8035583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.6880798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 360.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0741271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.7744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.96087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9757385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.86614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9029846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1583557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1372985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7172546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.6327819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.33978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9662780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1700134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7923889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.9267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.83551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9773254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.3307189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8706970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7693176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.8207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9313659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.96490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1614074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.82598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9804992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.44989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.83319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.84149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2366027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.8357238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9755554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9826354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9309387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8952331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1954650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.71697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1234436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 360.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9996032714844
############ Running episode number: 719  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.6849060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8088684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.5626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9508361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0254821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.34112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0210876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0285339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.03375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2111511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.04736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3592834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8033752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.9087829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8368225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.6705017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.73455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8153381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.8236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9571838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0492858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.92816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.22625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.4941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9059143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.82293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.70587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8036804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.80596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9020080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7999572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8511047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.13409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1889953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.80096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.9041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.8213806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6899108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.90460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8731994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.05242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.19140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9671325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.87030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.968505859375
############ Running episode number: 720  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8213806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.5924377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8075866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.82733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7702331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.78094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0428771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.02227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.64715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.8002014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7894592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9087829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.09515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8443908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2377014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7270812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1528625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9802551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1501770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.70965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.8387756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.70068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.7276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7118225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 360.53741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.6685485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.95465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.09588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.7733459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0371398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7309875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1467590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.11328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9789733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.93060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 360.230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9845275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0889587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.00823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.17626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7033996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0820617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0661926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.69207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9351501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.93194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.30682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.8260803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.4573669433594
############ Running episode number: 721  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0367126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0248107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7238464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.83514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.9433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.04901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1709289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9283752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.9029235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.05645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.70184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.8838806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.61285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.72705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.7003479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.9208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.9446105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.65673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.06781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.87371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.1885681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7519836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9756164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.08355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.8332824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0418395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.95355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9466247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9878845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.4209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1596374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.77642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.18670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.7244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8661804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2718200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.88250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 360.1018371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.16741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.87994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.74639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.505615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.4438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.7132263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9140930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.82269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9106750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.83221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2383117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0307312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9845886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7882385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.67913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.71185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.11065673828125
############ Running episode number: 722  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.37109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.16748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.80511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8031311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.78985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 360.3717956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.6231994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.25848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9544982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.7868957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.00518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.12066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.88385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.3939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0852966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8915710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0547790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.12786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.7748718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.58856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 360.2777404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.75836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9543762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.88653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8542175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.0430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8522644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.15283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.98443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.97149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 360.5997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.01715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.11737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8887634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.5807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.95489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9041442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2825012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8768615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 360.2934875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1557922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.8135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0319519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.8287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.05462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.6712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.93170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.89697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0362243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.70001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.26641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7358703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.77337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.90289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1560363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.6583557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6985778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7765808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.8214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 360.3373107910156
############ Running episode number: 723  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.80328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.12176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1952209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8114318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.68426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.13848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.86090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.88519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9746398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.7937927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.09161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.8630676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1355895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1364440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1882629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.09906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.3402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.9878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.00146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.04815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8893127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.7359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.4232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2681579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 360.2619934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8967590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3362121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8529968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1717834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.7651062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.15350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.25628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.77142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.64117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7111511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1392517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8561096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9983215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7970275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0683898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.77099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.8404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.84124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.14605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0702209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.96630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0959167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7695617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7676696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0581970214844
############ Running episode number: 724  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.01409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1189880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.6526794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0478210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1140441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.8092956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2842102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.97235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8964538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0819396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.05682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9542541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9148254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0439758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8544006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8417053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.7775573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.88739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9060974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0737609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.5838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.44183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.8269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.93798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.29052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0193176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 360.7440490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.8102111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.93505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.29229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0459899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.8602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8695983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.74725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0292053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.75311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5349426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.13629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 360.07415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0386047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.3284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 360.27166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0736389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.6422424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.69927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7563781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0479736328125
############ Running episode number: 725  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.7904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7366638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.2801208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9917297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0636291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1411437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0588684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.86590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7351989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.7823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8317565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.2936096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.99102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.98590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9363708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.66912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9781188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9823913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.15283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9047546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 360.13427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9074401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.6335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.7253723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.74371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8199768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.69207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.88006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.88323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2422180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.05841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.6535339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.7859802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.09423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8011169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0049133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1633605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.96734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8475036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.13519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.81597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.99310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.4681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.7773132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 360.68194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9090270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8860778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.76177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9742736816406
############ Running episode number: 726  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6289978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.91082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1483459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0672912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7209777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9671936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9671936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0974426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.80047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2262268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.78253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0437927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.82916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.16790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.75177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1487121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1158752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7647399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8511047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.07025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1419982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.99676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8210754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9977722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.08026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7525939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9772033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8689880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8974304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8507995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9120788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8923645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8658142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9181213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1948547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1136169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1277160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7604675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.15777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1222229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7830505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8345642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.07098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7478332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8370056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.99468994140625
############ Running episode number: 727  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.95880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.11383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7665710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9083557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.88592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.57806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.21820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.78265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.4510803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.96734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9403381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6969299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.75787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.04962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0483093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.20703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.10784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9131774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.98858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9332580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8022766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7914733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9420471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9969177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0144348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0267639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0595397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0230407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.02557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8662414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0563659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9626159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.67657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2005310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8509826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.94696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.6147155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.95379638671875
############ Running episode number: 728  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9964294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8666076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.76556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2357482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.03753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7268371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2991027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.08721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1535949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.17828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.92523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0299987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8723449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9373474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0847473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8558044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.18865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9993591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9286193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.78955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.84088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9110412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.11517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.12176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7509460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.04290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0619201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6304016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.72027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7222595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.86614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.11083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.92535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.90399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.17083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1513977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6982116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8482971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8530578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2955627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8192443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.02252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9721984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.15179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.81866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.04541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.90533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.04046630859375
############ Running episode number: 729  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0633850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8000793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.15386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8141784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.2301940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.74151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.03326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0249938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8597106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0901184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6811218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.06756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.07232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.71636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.72637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.83270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0591735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.16607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7955627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6487731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.89605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7940368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.92803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2419738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1295471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.95751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.86541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8875427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2065124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0419616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0363464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.04046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.75048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9477233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9952087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0422058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9877624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8817443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1877746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.73321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7605895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8642272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.97808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9825134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0050048828125
############ Running episode number: 730  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0102844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.98095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0744323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.78961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1136169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.89764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.90899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2314147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.75390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9585876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0573425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1534729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8493347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.5816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9162902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8052062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8144226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2176818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.07684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8697204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8360290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0971374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.05157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9433288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9703674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9483337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.79339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.02349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.92236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.88958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7130432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8377380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1093444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0044250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.05621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.19873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.08758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8262634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7767028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7546691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.07293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8397521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1493225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8099060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8723449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6965026855469
############ Running episode number: 731  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7720031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9082946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7777404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.88677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1416320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1451110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8691711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.91845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.83514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1380310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8129577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9678039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.96075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.84539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6958312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7409973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9703674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0961608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9287414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1361389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90692138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1228332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.95147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.5886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.07440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8468017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.09063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9721984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9610290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.26702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9677429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2474060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.97100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9845275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0581970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0988464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1097106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.13525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.84710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.87249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.04119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.75006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.84356689453125
############ Running episode number: 732  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9009094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.89813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1134338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.84613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8728942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7122497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.11376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9353942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2248229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9286804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0377502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7892150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.12823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.02520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0618591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9212951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.20037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7860412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.02740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9535217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.09942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.79547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9659118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.07952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9166564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1170349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9060363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.93798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.79815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.86865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1071472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.09515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9597473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.81475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8750915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6867370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7473449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0923767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6672058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9322204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7063293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8018493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2311706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.89947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7699279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0388488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9401550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.79461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7166442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9703674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8829650878906
############ Running episode number: 733  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9601745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.10302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8393859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.80364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1253356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7824401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.81317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.88836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0345153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.01239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9156799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9394226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1429138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.01336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.09991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.04278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1280822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1805114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.84014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8927917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7630920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.06536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0498352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9742736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1271667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.69793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9777526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9870300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.06689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.07635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7911682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.72125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1363220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.78741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1510925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.96490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.64007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0687561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7812805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0808410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8531188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.11187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.07171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0578918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9967346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.83660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6159362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0132141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9864196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0755310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.4046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0756530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7684631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9911804199219
############ Running episode number: 734  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0007629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9789733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0931091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0366516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.95489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.72540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7295227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8264465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8899230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0628356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0379943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.97662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.07440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.24810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7242126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.88970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.85540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.87457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8735046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.91229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.00494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1130065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0510559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8236999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.78265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.13641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6901550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.07684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9255676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.79656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0013122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0141906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7253723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.11639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.73187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9220886230469
############ Running episode number: 735  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6972961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7159118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0565490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8589782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.86627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7148132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9913635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1588439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.97900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.89300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.06878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8733825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.11260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.78924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8007507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8260803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9565734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1055603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8768615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.15338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0617370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.93353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7817077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.90167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6688537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9877624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.86566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7679748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.99798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1566467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0238952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8642272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8006896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9982604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1274719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8104553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7802429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.89947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.85870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8028869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.77886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8003845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8492736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.950927734375
############ Running episode number: 736  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9653015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1814880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.90850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7750549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.19818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.88128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.84912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0848083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7565002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9664611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9578552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.08154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8428039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8309631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.83905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.93450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.82623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0152893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.18426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8103942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9388122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7634582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.87811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.87353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.85345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1138610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1407165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0486755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.5999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7337341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0450744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.07763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8914489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.97686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.79547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.07452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0660705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9585266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8441467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0704650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7986755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7030334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1464538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7646789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7976989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7781677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0301208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1322937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7702331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9109802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7492980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.95947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.09661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.76751708984375
############ Running episode number: 737  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1363220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.73431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9952697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1083068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.10955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9952087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1384582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.86444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.14990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1271667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.15631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.89862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.87518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8085021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.10638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8659973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0653991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9573059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.07232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9451599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7721252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1006164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.93963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6698913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0708312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.02880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.07421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.89154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.99761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8145446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.74090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7209777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.11883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.01593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7202453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1340026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0662536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.90069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.83355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0143127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.28192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8396301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9659118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.95526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0107116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6907653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0177307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.72589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.09564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8057861328125
############ Running episode number: 738  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7491149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6368103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.73834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.84515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9324035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.96380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8211975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.23040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7720031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.05462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0973205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9916687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9377746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0882263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.09918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8377990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0434875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.94769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.14263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0166320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8442687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1244201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8810729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9417419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1546325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.93780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9302673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0517272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7552185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.82086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.89581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.22943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.83740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1086120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.14520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.76641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.86468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9781188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0841369628906
############ Running episode number: 739  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9295349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.64306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.70849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9909362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6007385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.18292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0453796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.04425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7542419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7415466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8514709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0096740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2343444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9255676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.85089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3249206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8672180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.81781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.78704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1217346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.04791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0488586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9722595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.04827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1383972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9239807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.80633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.86474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.03765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8556213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.02349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9535217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.12261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.12451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0660705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0217590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.85302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0276184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8547668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1274108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9396667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9518737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9924011230469
############ Running episode number: 740  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.01251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.09710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8661804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8298034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.85308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8890686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0661926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.10162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8607482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1005554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.55352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8828430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8981018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.83123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.90423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1399230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.88128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8569030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8968811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1230163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.22344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.86383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1084289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9398498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.96533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.83526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.75006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.99481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.03448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9944152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0815734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7721862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0703430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.98529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9328918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8402404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0214538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.87518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7886657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.91082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7067565917969
############ Running episode number: 741  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9951477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1612854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.91436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9264831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.04364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9355163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.98712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.05694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.78350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.92498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.16119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.80218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9231872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.03399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0735778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0379943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8025817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.16455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9922790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8172912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0069885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1451721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0349426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8961486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8652038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1043395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.96539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8594055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.86700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8522033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0686950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0224304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.04730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7381286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0978088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.79510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.13104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9153747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2633361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0375061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.15936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2716979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0934753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1538391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0746765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2621765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7083435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9308166503906
############ Running episode number: 742  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1258850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1606750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.84014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.09228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6146545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.03582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.91607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8689270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1072692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1011047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.03826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9621276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0104675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7333679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.14654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8708190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.6840515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1376037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.97381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1797790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.13818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7536315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.97857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.72271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1293029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1513366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.12725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.80560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.75048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9355163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.90435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0260314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.27001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.74609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.79498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6703186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.04010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0652160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.81573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8285827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9870300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0141906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1108093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1369934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.94000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.95379638671875
############ Running episode number: 743  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7818298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.10882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.03985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.83502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0711364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0290832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.05633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9929504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.12213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.02392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.94512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9403381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8243103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0502624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.14251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0778503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8481140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0928649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.80548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8270568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8243713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7228698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8416442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.04248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9229431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0039367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0929870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0478210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1783142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.04681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.79736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.81024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.71142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0769348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0286560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.11834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.03302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.75555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9591979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.97906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0931091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9435119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1065979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1181335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8708190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8454284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8104553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.01605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0257873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.82330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0705261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9501953125
############ Running episode number: 744  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9427795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1968078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8406066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1802062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0909118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.07196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.63775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1251525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.08978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.13714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7149658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.11358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8252258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8098449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.81744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.13568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1924133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.05169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.74603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8758850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.13739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.92279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7984924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8229675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.12225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9673156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8575134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.95068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6990051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.98492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7735900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.26239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8461608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.98724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.94580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6135559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0249938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0223083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7810363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9175109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7072448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.14666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7676086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.76385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0115051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.06109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.99298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0701599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.08831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8149108886719
############ Running episode number: 745  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.87664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.6800842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0077819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.08355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0211486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.75872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.03289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.02862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1858825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.91455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.96551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8919982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9145812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8431701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9521789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8040466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.15069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.01104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0529479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8808288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7975769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9303894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.11297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8267517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.69879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8513488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8450622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.96954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7469787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9082946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.60406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.81427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9384460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8609313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.88323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9952087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.84710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.91998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.95330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.85986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1043395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0272521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.99420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.13775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1614685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.13385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.81024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8968200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.982421875
############ Running episode number: 746  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.93450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9399108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9250183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0469665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1464538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.10736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.26025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0381164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9057312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6791076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7855529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8334655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8185119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8351745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0745544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8226013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0702209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1051330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.89837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9350891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.08837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.89349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.06878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.15771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8736267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.53546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0758361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0044250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1116638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.03485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9917297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2237243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.99420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8642272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8511657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.70611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7424011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.05322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.01666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9797668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.07904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.07952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.88726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.82159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0816955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.22625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.85150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98260498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.79718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7679748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9036865234375
############ Running episode number: 747  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8252868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7506408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.80877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.06927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0406188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8222351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.94097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.13275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0807189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8523254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.04241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0937194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.22308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9483337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.78704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1139831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.87554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0111999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8745422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0665588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.99853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.81072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0766296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.16729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9297180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.11181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.92449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7582702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7969055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.04766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9637145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1173400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9723205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.93231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0534973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.98126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8154602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1618347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.01214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1026306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.14935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98724365234375
############ Running episode number: 748  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0997619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0906677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1759948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2528381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.08990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.10455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2188415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9111633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.06182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.02423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.92498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7498474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9462585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.95819091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8039245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8628845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1131896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9360656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8533020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8387756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1107482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8165588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7358093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.83477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8925476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.08056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.82135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1071472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.77459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.08770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.13128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1288757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0031433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.69708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8924865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.83636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.15625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0320129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.10302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0724182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7289123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.75787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7265319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9435119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8143005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.02337646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.01947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7599792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.98077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.93145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0438537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0136413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.86669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.87017822265625
############ Running episode number: 749  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2457580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7054138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9220886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0779113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.91558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.13531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1616516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9585876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7527770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.85137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1411437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.84814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0896911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.05731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7474060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.02813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.76104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1122131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1974182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0649719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.14776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0700378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0649108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.88525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9969177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.91949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.85809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0339660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7633361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8773498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.93157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1189270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.96380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.12908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8376770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.82135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0344543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8429870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.79766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.83746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0901794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9610290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1629943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.87591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7807922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.76495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0086975097656
############ Running episode number: 750  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9364318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.14306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8735046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.67230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.94061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.12298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.99652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6553649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0921936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1646423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.00244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6598205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.15972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.00189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0426940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0303649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.81451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9203796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7760925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0613708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0726013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.11627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0047302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1063537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1874694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8063659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8094787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.90924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.34234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9288635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.00396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8525085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1319885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9906921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0892028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1894226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.78045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7156677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.90130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8777770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1712951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0752258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.04095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.08599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9990234375
############ Running episode number: 751  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.06231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0066223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1034851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7324523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.88134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9275817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7687072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7273864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.92742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1227111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9895935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8143615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.96051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2366027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0254211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.96710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0799255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1504211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.94171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0074157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.88177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8327331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.68853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8871765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8928527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.95025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.70111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.02587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7359924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.27056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1153259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9582824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.00872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0641784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1820983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9138488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8367614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.18896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7725524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.18255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8594665527344
############ Running episode number: 752  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0718688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1066589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7652893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1303405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1060485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.82568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9346618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0671081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.63629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9850158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.93572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0759582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8285217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.11260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.72137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.00408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.79962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0858459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.04296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.71002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9583435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.70794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9505310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.89434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9704895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.94879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.78912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9173278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9938659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8942565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.99664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9283142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.83770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0726013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0097351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9648742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.83734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.91876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.09466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.73541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9529724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1755065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0415954589844
############ Running episode number: 753  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.80377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9219665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.96136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.16790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0639343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9490661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9837341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.96966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1869812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.92724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.71954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.81329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1103820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.78961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0215759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9039001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.71368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.73638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1988220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9587097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.82293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.01019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.84466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6996154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8109436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.88519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9919738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.75054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.03167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.84625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.08349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.13818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9755554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0818786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.84912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0445861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0451965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.06134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.93365478515625
############ Running episode number: 754  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0321960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0237731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7344665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.98828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1539001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0368957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.99139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.10418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8301696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8045349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7059020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1915588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8471984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.10052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7658996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1276550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9240417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.13818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6895446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0697937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.03399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6272277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.10589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1290588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9591369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9893493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.07012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9515686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9945373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7434997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9986267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9286193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.80352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9480895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.05816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.78265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.21990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9141540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1520690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0729675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.06341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0997619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8652038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1193542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.91998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.89642333984375
############ Running episode number: 755  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7798767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1506652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.04931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1242370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.97222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.74444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0809020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8591613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9642028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8522644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1451721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2569274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0959167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7352600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.92724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.6948547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0851135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0649108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8519592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0039978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0333557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.6197204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.16094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9557800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.82989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.20648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0798034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.89385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.02630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8713684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.90765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8791809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9225158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8266906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.5553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9518737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1152648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9328308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.02191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9194641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0427551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.01739501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7825012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6594543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6999816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0545349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0602111816406
############ Running episode number: 756  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.79876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.07122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.85479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.88360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.01031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7777404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.85772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8272399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8329162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0214538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.95574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0005187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0372619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.06451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.70489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0719299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8414611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.13299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.92059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0693664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1046447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8597106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8368225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9108581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0100402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.11004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0331115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0981750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.07086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9595642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.89068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0332336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.12188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8320007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8490905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.76971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9361877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0083923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.91876220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9441833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.12408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9786682128906
############ Running episode number: 757  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0653381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8362731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8374938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.90753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8874206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7678527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7878723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1282653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.16375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8814392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0586853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.13739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1212463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9037170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.97760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9246520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.68267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.08551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8542175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.16131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2106628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0804138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9516296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.02142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8299865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.71234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7337951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0592956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0088195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.95770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7869567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8656921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8735656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8490905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1241149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0663757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.06024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9429016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7803649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9080505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.60235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.96392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.98382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0440368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1523742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9538879394531
############ Running episode number: 758  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.89471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0726623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.86474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.17132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.84930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8103332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8907165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0563049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9963073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8314514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.73919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9710998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7326965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6918640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.80108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.67510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1222839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0407409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2607727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.02783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1498107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.5483093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8724060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.89300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6486511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.95697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0948791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9351501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.76934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9133605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.03900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1688537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.70172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0362854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.88897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.78765869140625
############ Running episode number: 759  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.5454406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.08740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.86572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.86419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9317932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.03076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8691711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.78387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7716369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7861633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.05096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8960266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.96087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.07757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.08551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7409973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1466979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0199890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0276794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0615539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0204772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.16375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2239074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9828186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0909118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9881896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1289978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.75152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2979431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.81854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0780944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1285705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.19696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.83038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0529479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0872497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.78302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8177185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0641784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.03106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.84088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.11090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7359924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1064758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.12353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8587951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1023254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.81005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.07452392578125
############ Running episode number: 760  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8308410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.13983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2469787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.88348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.93963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.01806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9969787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.76251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8982238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0960998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.31573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.83648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.82781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.79931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8323059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0044250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.01263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2018127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8276062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0578918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6849060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.98577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0560607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6648254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.09417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7110900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0260314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1473693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8115539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6620178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9760437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0108337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7690734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7731628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9082946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.83538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9023742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0752868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7795104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.05047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.05780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8688659667969
############ Running episode number: 761  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.71484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7019958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.84539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0318298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8213806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.13165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8986511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.06671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.10894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.6015930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1889953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8632507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0375061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.88104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0386047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0236511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.95806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.11822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8711853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8193054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0493469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9210510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9154357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.13348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9217834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.89776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.98223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.72222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0461730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.96112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.03759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.97442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9884948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.10076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7444763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9328918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8342590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8312683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.91748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9433288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7992858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.79034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.91485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9748229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.85986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.12994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8034973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0227966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6170349121094
############ Running episode number: 762  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7828063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1454772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.89117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.18988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8738098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9794006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.97216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7436828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0647277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6520690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.05059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.03564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.02874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9803771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.76544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.98419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7869567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.01617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2038269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0338439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8471984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.86773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.09783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.88714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.90545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0516052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1317443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0413513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1261901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9710998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9275207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1448669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.75457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.89776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1134948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0214538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9113464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9698791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0682678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7563171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.6947326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1988220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0272521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7411193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8670959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.047119140625
############ Running episode number: 763  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9068298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0099792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9197692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3273620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9409484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.18511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.81689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7750549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.85107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9580993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.10882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.95855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.80267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.99212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9704895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7444763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0268859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1136779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1300964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.97796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8725891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.73699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8806457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.20001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0626525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.13800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1014709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7840881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8807678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9134826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1745910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9875183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.13055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1680603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0484313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7795104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.06927490234375
############ Running episode number: 764  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0970153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2345886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.18072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9725036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.95013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9550476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.00750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6649475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.88494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8512878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0349426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0824279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.90924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1003112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.6065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9220275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9526062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.04962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.77166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9120788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8326110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8581848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0852355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8526916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.99249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0007019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.54248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.3066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7668151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.99188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0347595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.04254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9165344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9623718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0249938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6846618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0872497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0829772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8165283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.00091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7322692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9808044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7815856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1411437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7203674316406
############ Running episode number: 765  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.75860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8102111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1463928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9723815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7348937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8442077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0066833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8575744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.6114196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8310852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9208068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0708312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6207580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.76025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.6166687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8189392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.15673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2856140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9178771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1959533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8276672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.13299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.16448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0362243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9960021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.20953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9987487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0560607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.72467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.08441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.02886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1559753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.24676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7162170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7247009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.13653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.07159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8723449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8143005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9372863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1683044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8760681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8872985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8466491699219
############ Running episode number: 766  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1336364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.70196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.08758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9293518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0445861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9558410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9756774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.83270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.22808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.11212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0424499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0610656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.07763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8580627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8376770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0946350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3192443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8397216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.07086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.84783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0306091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1324157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1219177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.88604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7145690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8743591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.04632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8130187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.82916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7880554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.09906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1236267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.00079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1280212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9447326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.5634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8484191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7972106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0360412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8059997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0166320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8725280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.94281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0294494628906
############ Running episode number: 767  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1448059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.17315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0281677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9848327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1116638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.82574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9184875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.69781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.6927185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8618469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8665466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.16217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.12298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9535217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.94561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.03570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9582214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0619812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9878845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0683898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.94976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1049499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9808044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8240661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0684509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8908996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9971618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1991271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1828918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7686462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.82366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1316833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0743103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9760437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2377014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.82373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1366271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9060363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.88006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7356872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1129455566406
############ Running episode number: 768  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9671325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0550231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0002136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0382385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.09478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.85748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.18438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.90679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.96380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.11456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.90325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.84039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.21282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.12054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1609802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9258117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0425109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.13006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8441467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.84429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0101013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7157897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.23272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9492492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.6402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.85076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0270690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7643737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.07952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0361022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0818786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.09814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1589050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7672424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8418273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1278991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1599426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.06927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.94744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9424133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7194519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9807434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2120666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.12872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9440002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9520263671875
############ Running episode number: 769  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8465270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9176330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9538879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8291320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.98626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1040344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.07989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1250305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.05029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0403137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.738037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.06671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1473083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.04705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.08087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8125915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.97955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1413269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.77239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.77716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7158508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1016540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6582946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8583068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.78863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1231994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1288757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0263366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0100402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8731384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0914001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7605895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.11566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9648742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0460510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.05963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0918884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1160583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.06732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9606018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9985656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.03558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0373840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.3629455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6958312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1532287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0577087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.96112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.77032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0912780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.15570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.10626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8378601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1556701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.05938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.99346923828125
############ Running episode number: 770  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0243225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.78759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0345153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9572448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.83404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2666931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8208923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1520690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6413269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9984436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1001281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7950134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.88800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.98846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0339660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0342712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6887512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7926940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7479553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1034240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9141540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9602966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7612609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9407653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.97027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.01483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1579895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0874938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7931823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8296813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.5216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7571716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.92132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8039855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.83746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8329772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9553527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8433532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0127868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2066345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1667175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9828796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.70556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9179992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0920715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0804748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7947082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0242919921875
############ Running episode number: 771  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0685729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.00909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.01904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9967346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.91912841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8453674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9637145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.66259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8826599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9582214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.10333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0605773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0407409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0647277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.88262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1522521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7704772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8595886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7524719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0757751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.89056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.87335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0697937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0680847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8390808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8370056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0041809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7511901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7758483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8818054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.62371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0154113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2868347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9617004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.07940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.93560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.06280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0162048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1968688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9604187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7726745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1313781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9454650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.79766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.47802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.85589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9302673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8395690917969
############ Running episode number: 772  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.83856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.71563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9204406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.98175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.64794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9868469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8065490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.09515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9476013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.60443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.82525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.07855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1596984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.66522216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.06988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.03399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9104309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.02947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.07708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.99041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.79718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7827453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.75372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8337707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.6468200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.08282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9878234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1070251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7941589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7065734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.81890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.79400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9074401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8715515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9833068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9585266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.05267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9878845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1667785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.25238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9952697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.02679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1397399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8784484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1632385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.5920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.68804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9547119140625
############ Running episode number: 773  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8041076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.93560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.88067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.19573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8769836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1247863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.89801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.00067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8954772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8569641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9819641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9908752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7729187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2115173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.84381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8940124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.17919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1206970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8419494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.86480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7752380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7731018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7080383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9835510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6653137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0275573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9665222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8728332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8741149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.02459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8533630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8631286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7513122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0776062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.84124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0697937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9566345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.04132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0161437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.20257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0503845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.86346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.62835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0254211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.93792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.70513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.16363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1381530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.70751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9742736816406
############ Running episode number: 774  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.79852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8883972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1048278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8392028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0301208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9064025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.78607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9470520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.94091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.10906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.25311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.03350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7729187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8702697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7132263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9352722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9145202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.86016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.00238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0722351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9710998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7609558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.05706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.03985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.62713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0860290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9668273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.21435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.78875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.03607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.87591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.5965270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8700866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1050720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.82781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9769592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0603942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8482971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8588562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1280212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0259704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0673522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9079895019531
############ Running episode number: 775  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0502624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1205749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7948913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.93621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.11834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.00701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0782165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9309387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8440246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2126770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0313415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.05096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.80657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0081481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.97479248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1192321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9090270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.98822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8557434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1671447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9901428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6488342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.74261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.87640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.73333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2341613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8733215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1439208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0617980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1466369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0959167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9323425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2361755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.79425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.12677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8578796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.76031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.96173095703125
############ Running episode number: 776  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8576354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2306213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.77996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9087219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8316955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.11016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7338562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0066223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8753356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.95172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.80157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1098937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0669250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.22174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9541931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0432434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0803527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.96099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0707092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1266174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.66900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8371887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.80499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0331115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8022155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9069519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1777038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7294006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.77166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8549499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7975158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.92266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0704650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0096130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1493225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1231994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1035461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0586853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6962585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.82476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.99090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.83038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8365783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0899963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.01849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9561462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8666076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0951232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.87994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.92138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7190856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0533752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.09375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8685302734375
############ Running episode number: 777  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.99151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.01116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.13800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0931091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9439392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7086486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.16839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.03106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0997009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.96331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.75421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2878112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9863586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1229553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.94818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1993713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1122741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.61236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1208801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.12518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0251159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8153991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0076599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9032897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.89361572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8403625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.77520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2101135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0502624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7953796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.89788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.76666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8959045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.07830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0038757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0979919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.08392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0547790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7046203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0393371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9912414550781
############ Running episode number: 778  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.89813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.08148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.82269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1165466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.09820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1196594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.18304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.83367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.86285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.06976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7949523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1004943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8421325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.91485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1843566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0345764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.13623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.83123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8981628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.91864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.96826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.07623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.82257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0397033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.88238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0265808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.85357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7875061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.08984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1123352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.78717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.04144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0095520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1215515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.08087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1248474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0777893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.94769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9132385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8179626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6025085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0903015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1474304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9085998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.06951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9873352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1844787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.90325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8713684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0409851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0690612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0041809082031
############ Running episode number: 779  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8744201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9002990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9984436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7762756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.78778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1309509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0988464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.03790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.08599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9560241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.72491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9805603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9492492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.02032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0401306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1800231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.15350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.01092529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1091613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.99530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9639587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7284851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8921203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.72991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.72613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9776916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.69921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0109558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8375549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.11846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1000061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.16790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8113708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.12847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1241149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.05169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.84722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8245544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.75958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8911437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1131896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0773620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7640075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1108703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0318908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.80670166015625
############ Running episode number: 780  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.95172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.70245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8769836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0673522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7790832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.99261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2243957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0797424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0617980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7667541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.6590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0525817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7804260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.84295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.70611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1072082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8601989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9068298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1150207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.74334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0246887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.5830383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0486755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.82379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8863220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.86419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.87451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.03436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8158264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8418273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1658020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0106506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.05950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.90093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7442932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0534362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8330993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6850280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0214538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.02978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9561462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9777526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.75189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.13616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.68560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.86859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.09564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0621643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9071960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.071044921875
############ Running episode number: 781  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.6803894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7596740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.83331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.09808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7314758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7875671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.21478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9171447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8313903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.13812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.19854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2618103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1911315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1459045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.02899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.80780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2249755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1055603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8939514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8589172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.09210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8550109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9963684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7368469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0072937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.85382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.09075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0237731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.98388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.83380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0409240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.99041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0981750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.20172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1284484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0232849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.79736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7977600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7373962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.32501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.12432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.94744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8861999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.69232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9028015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.81781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8165588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.80072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9298400878906
############ Running episode number: 782  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8511657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.06298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7641906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9637145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9507751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0942687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0977478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.09210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7533264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.88275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.97161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0121154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.15740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0329284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.69537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0456848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9521789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8492736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.80517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1762390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.95025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7007141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.85601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.79144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1458435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1119079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.15362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8075256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1463317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.78521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.83740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1453552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.13555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0967102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7529602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8647766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.12994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.74951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.96466064453125
############ Running episode number: 783  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.6965026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1790466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8783874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3813781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.85565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8736267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0428161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.95465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.19085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0856628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.15167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9839782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9714660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.2086486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0451965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1678771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.83978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0276184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.79510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.95147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7547302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1064758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9319152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.14788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.19940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.05853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8750915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8235168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1300964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.12042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0907287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8699035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.3017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.80792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8747863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.01763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.77313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7076110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.88604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0032653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9484558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8258361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9802551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1556091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8988952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8995056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8815612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8208312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8868713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0583801269531
############ Running episode number: 784  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0547180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1003112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8009338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9678039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.88494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0115051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.86474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0818786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.97784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.96612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8208312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.17877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8894348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9477233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0906066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7691955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8934631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.92437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.20159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7853088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1385803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.3037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9587097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1244812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0161437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.83087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.5977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2146301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8716735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0525817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9902038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0433044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9535217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0883483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7276306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0359802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0155944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.07586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1134338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6267395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0542297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7237243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.07952880859375
############ Running episode number: 785  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8821716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9768981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0830993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0352478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2171936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.96575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.83795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.70245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7615661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6721496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.6260681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.79248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.83770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0575866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8083190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.3270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8476257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.93572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.91864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1406555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0736389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7713317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9508361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8419494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.74932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.08050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0339660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.95404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.03326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8334655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.81231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.88189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.81121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8671569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.79779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0133972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0233459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.5240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.03460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0886535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9828796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.93560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7215881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1970520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7963562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8125915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.16552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8617248535156
############ Running episode number: 786  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8028259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8876647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9023742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8069763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7385559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1721496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1484069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0538024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.74993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0476379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9226379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.85125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0104064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8019104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.08807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.16534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.02252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9632263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.73638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.56298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6541442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0881652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7565002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7851867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.11199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9919128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7624206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1113586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.10919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7204284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8296203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0221252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8254699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8467102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.86328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2233581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1900939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7270202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.89166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0774230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.81060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.94091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1684875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9629821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0160827636719
############ Running episode number: 787  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8825378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8550720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.08172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0379333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0499572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1095275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1087341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9420471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.14202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2374572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0605773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0144348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0473327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.70599365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8515930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9648132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.12786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9695739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8205871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.06597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9350280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0882873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1118469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8009338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2608337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.77081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0787658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1183166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.13446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7709655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8789367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.98455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.16510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.83990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.10015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7884216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.98052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.95416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0583190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0719299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1520080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.82958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.99853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0299377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0788269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9338073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.31475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7635498046875
############ Running episode number: 788  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.01019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.15325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.97845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.98992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7706604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0092468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9474182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.77276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.70025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91387939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.97119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0259704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0846252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.11431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7677307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0623474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1618347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7330627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8160095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1696472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.99267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9271545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9661560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9050598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.86297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0590515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.94927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.87286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.84478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1205139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8387756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7217712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.95062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0477600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.84844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8710021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9773864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8420104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6041564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9450378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0077819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2436828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9801330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0052185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.10516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9757385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.57208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.84576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9741516113281
############ Running episode number: 789  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8298645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.95855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.80426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.91510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0257263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1555480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2185974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.97198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.79180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8271179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9637145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8185729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8412780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9049377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0031433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0078430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.24005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.5794372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.6313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9698791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7574768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.05712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1722106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8019714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9162902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8231506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.00518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1344299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8877868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.90728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8967590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0400695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8201904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9848327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0757751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9612731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1515197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0526428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7458190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6250915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9083557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.541259765625
############ Running episode number: 790  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0950012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8724670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1904602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0408020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1296691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7697448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1164245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1102600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1083679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8693542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8721618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0437927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7555847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7444152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.77520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0146789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2178039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.03485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0422058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.05853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.78289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2602844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0525817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.01483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8077087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.95721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0948791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8357238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0979919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.98828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8695373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.90753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0108947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6920471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8141174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1015319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9997253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.09747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.00384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9217224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0273742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.91448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.02239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1164855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9022521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.929443359375
############ Running episode number: 791  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7865295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.91864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.10675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.11083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.05755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9400939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8697204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1683654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7484436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.92034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9257507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0522155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8236999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8518981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.04876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8907775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8205871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7441101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.88323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8395690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.12286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.84979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7157287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2265319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9571838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0777282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0709533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.75067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7486267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.11346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1004333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.204833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.21563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.02728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1018371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9380187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.99871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.76263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8497619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.98931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0357971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1278991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.94439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8954772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9422302246094
############ Running episode number: 792  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9095153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0235900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8708801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.79388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.96356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9219665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.12298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8495788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.82513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7400817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.90936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.93255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1015930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0487365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9089660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1056213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.6929016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0509948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0381164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.92156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.83489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0627136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0518493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9369201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.82049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.14007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0323791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.6986999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0496520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0399475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0166320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1768493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9319152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9678039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.80279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0058898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.76513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1617736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9602966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.91845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.18060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1466979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0095520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9884948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9727478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0823059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.93316650390625
############ Running episode number: 793  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8057556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7770690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0090026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1249694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9013977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7738952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9482727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1018981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.02593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8431701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0117492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.17547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.73052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0458679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0966491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.10760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.08990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.88482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0826110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9540100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.5863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.06787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9679260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8673400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0501403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.19317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0558166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0724792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1482238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2109680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8927917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.66265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9772033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7764587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.90386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0275573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.14801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1276550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0326232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.92559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.83929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.98614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.99237060546875
############ Running episode number: 794  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8121032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9914855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.06036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.14215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.82666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.83575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.85162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.71923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.83258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.16973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9629211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1060485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.81561279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.97686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1181945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1069641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7517395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8579406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9892272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.71527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1222839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9249572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8072814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.88043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.97784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9717712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9369812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.98651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9870300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9197692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8924255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0171203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.90679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2072448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.03045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9678649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.91473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.14862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9267272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.81317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.93133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1653137207031
############ Running episode number: 795  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.82464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1798400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0086364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9099426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.00921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.81072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7344665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.95989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.81024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7601013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8904113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0607604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.73626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3357849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.5555114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9936218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.80279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.98553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9911193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.99285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.16143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0809631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0608215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9884338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0534973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.98828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.79095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7106628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0169372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.08782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0070495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0778503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8058166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.95947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0958557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.93316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.82806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8670959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.86163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.91290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7115173339844
############ Running episode number: 796  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9971618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9877014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0552673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0942687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.16998291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0353698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9986877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0174865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9674987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.83251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.09130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.83673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.83489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0004577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9148254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8071594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8332214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9029846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9909973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9087219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0547790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0348205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.89178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7863464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6947326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7656555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.76348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8542785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8072814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9004211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.81878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7603454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.5943908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8928527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.97686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.09014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.06341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.85687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.80810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1257019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7053527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.84747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9595642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1764221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8868713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0953674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9294128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.89666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6921081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9110412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0934753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.14599609375
############ Running episode number: 797  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9292297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8774719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1468200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.78179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9164123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.96087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7107238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.92974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0251770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8713073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2621765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0176696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9250183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0407409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7412414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8656921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0167541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0376281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.10235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.16259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8120422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.07672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.00396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7544860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.82403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.82513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1062316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2591857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9565124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.23687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.00189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.06048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7732849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8567810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8406066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.83416748046875
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 0 21.0 1204.59090422 (15.892373986997768, 9)
loss 358.9018859863281
Current State,action,reward,Response time,Next State:  (9, 15.892373986997768) 3 20.0 1255.00488935 (15.954793861767499, 10)
loss 358.9739685058594
Current State,action,reward,Response time,Next State:  (10, 15.954793861767499) 3 19.0 1223.96796344 (16.004586266677634, 11)
loss 358.6780700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.71893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.04229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0186462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.9911804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.9934997558594
############ Running episode number: 798  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.6248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0199279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.6580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7748718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.7546691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.84149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.89971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8309020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1127624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.17816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.7494812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0353088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.85968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0642395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 360.37774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0469665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0574645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 360.22552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9462585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.03790283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8163757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.3850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7600402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 360.2679748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1808776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8652038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0565185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.51318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0467834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9389343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.17144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8509826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 360.0564880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1064758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1587219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.66064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.17645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.10943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0445251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8631286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9487609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.10888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.42291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5476379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.89324951171875
############ Running episode number: 799  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2373962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1724548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.88934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.40411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.10968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.9227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9140319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.1402893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.90618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0563049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.92620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.44805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1800231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.05303955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7215270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.4847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8659362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7098083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.96099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.7053527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.93988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.91766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8791809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.89788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.4120788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0489196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.8906555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2062683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.91583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.77777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8938293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.0959167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2547912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.5904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.8924865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7730407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1233215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.08209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.28936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.83294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.79449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9939270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.6883850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1794128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.7381896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.6402282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.77899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9041442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9424133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.3830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.7798156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7742614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1758728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9334411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1654357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1312561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.89849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8609924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.95037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9224548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8526916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8209228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2022399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9710693359375
############ Running episode number: 800  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9862365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 360.1805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.20123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.38818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7843322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.44525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.67388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8354187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2026062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2143249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9893493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0629577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1595153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.16253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0470886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6556091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8197937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9760437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.94573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6984558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.80914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.88055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0325012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.58148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6889953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9966735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1932678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2058410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4091491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.94635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9259948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1809387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.08819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.26104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.32379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7096252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.9644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9928894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9502868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0290832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6969299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8408508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1988220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.8288879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.82061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.752685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2024230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2232971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.10394287109375
############ Running episode number: 801  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8044128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.7064514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8540954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1268615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8830261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.6246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1974182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0101623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8482360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.64471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 360.21868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8267517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1336975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.4983215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8497619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.77728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.80987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.7793884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.08416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.73321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7496032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1609191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0088195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7568664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1250305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8481140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.98834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2535705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7817687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.11553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.27740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.14801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.70733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.09478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8960266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 360.4228820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.27093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0352478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.04364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 360.0279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.14263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.25445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.90850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1405334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0248718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.01226806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9315185546875
############ Running episode number: 802  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.3843078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9526062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0698547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.17822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.79132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0588073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1885070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 360.27630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.90130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.39044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.8561096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.81500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.9405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.20989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.40924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.74615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.0238342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.87060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.98162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8344421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1047668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9498596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9153747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.85137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.5862121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0527648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.97076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.49554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8059997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7543640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0007019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3553161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1849670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8136291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.04693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.81524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9568786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2051696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0549621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3851013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.21807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7402648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8741760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8570861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.97772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.5997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1741027832031
############ Running episode number: 803  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1252136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.6898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.08251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.81488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7138977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.34088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8800354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0873107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.22698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.2584533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.81341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7700500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0676574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.79022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1385192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0126647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.4469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8865051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.87091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.04595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 360.23712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.01171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.73809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0901184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9449157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.03363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0791320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.4284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.8583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.15899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1855163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7865905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.86907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.10772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9511413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.57708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2350769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9688415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0707702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2530822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8075866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8168029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.98876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.7129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1909484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.91497802734375
############ Running episode number: 804  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.73077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.6820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1535339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8019104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.17138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.08184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7285461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 360.1750793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9664611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.29107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1286926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.8187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.72381591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2025451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8878479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.6008605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8376770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.86553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9410705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.04083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9544372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.77642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.06304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.11322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.77410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9673156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.81842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0776062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0269470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.93670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9175109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8806457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 360.0895690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.12841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1367492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8129577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.76397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.04278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.78204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.89501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9091491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8806457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8457946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1654968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.77886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.14886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1242370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0073547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9700012207031
############ Running episode number: 805  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7445983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9229431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.16455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.93438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.08087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9715270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9122009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.03887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.12786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.86505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.78466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7544250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.07275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.8101501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9104919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.04974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2214050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8787536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1429138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9679260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7388000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.7643127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9698791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 360.1531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.00421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.92608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.8442077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0688171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2782287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.08135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.05120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 360.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.90631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.6617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.88800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0755310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0738830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9113464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8805847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.81671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.6734313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9233093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98626708984375
############ Running episode number: 806  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.8780212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0656433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8719787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.8540954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.4736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9095764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 360.01763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.80279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.07476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1406555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0833435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8700866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8296813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8498840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1533508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.82318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1418762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1223449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2296142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2096862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.6600646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7431335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.7394104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0268249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.93865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.35113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.94866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0263977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0681457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.08294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.74658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.91314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1122131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.16864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.7121276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.66925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0108337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.79644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9779968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1255798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.5073547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.98291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1752624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6792907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 360.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0345764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.07440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.92462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9027404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.04541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0443420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2426452636719
############ Running episode number: 807  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2251281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7920837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.8331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7397766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.7115783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.27740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.98114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0815124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.00128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.22747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.6105041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.5114440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.69158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.81915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.30413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1337585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8963317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 360.3631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7879943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8208923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.07379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.09075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 360.23419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.7388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.95343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.83697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7856140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7016906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.98089599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.3758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8984069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.20269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.5589294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6859130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.16485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.83526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 360.0789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.98626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1485290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1003112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0448913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.70013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.5661926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8782653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 360.033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.85260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9302062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7773742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8451843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.91510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6840515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1378173828125
############ Running episode number: 808  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9238586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.74310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9029846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9411926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.10076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1999206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7300720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0511169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8487243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.90142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.03515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7198791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0537414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.16436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8683776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.83349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0339660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.09918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7253112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9705505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0852966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.77215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.07550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0139465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0653381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0852355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6940002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.03106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8789367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.74468994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9997253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.10919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0590515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7993469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.10223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9121398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9224548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.03167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.00537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7426452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8311462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.06573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6670227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0101623535156
############ Running episode number: 809  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0194396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0680236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7685241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.77545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0361633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.18359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0098571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.98651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7262878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8473815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.11993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1473083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6263122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9662780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0931701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.97271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9053039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9673156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2015686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9204406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0718078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9255065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.15411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8919982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1102600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1947326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8251647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.2555847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.12066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.02642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.74285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1100769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6849670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.98553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8377380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8731994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1469421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1488342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.06280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9017639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0643615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8073425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1679992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8933410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0973205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.89117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.81866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9082946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7997741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2010192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2583312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.18310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0373840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8428649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2486267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7667541503906
############ Running episode number: 810  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9024353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0810852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0377502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.15032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.02630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.74859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0297546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.81536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.85687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7830505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2485656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.09222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.00506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0993957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0565490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8666076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1134338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1231384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0225524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8208923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0315856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7552795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.03692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.94561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.24127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.98419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2200622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0263366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.91650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.05279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8331604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0697326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9435119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6260070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.10162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0113220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8192443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0955505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.82171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9059143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0298156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8055114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.06744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.11334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7574157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9666442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1842956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9786682128906
############ Running episode number: 811  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8907165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0515441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1427307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8091125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8926086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0920104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.89764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9599914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.07183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8009338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.05206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9727478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8316650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9201354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9076843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7496643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9078063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.95916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1917419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8900451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0166320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7206115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9614562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.10980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7102966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.30487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1748352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8589782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9873962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.92938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8350524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7777404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8000793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.89154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9226989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.78125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1640319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.78485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8053283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.95367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.19708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8214416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0528259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9231872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1001281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.05548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8945617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.3545837402344
############ Running episode number: 812  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8294372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9373474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.75732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8341979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0894470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0970153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.85992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0146789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0746154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9366149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9330139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1377258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.97271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.73614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.84808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8982849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8414001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.09417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7626037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9048767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2373962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1333312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.10821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.13323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.99749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.79351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8011169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7934265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.84515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.84442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0698547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2291564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0498962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8395080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8036193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.78546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0188293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9839172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.98431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.10870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0210266113281
############ Running episode number: 813  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1154479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8337707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.94598388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9117126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8032531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.22601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.16925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.03350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7420959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8190612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0232849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9989318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1728820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9257507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.5843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7793884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.15234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.93829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8725280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.11920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0198059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.03289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.08331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8570251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.85595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8246154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9468078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8126220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1289978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1065368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.82568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.78692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.64251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9661560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8943176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.07855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.96221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9512634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1856994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.88531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.66790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.01641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0490417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.23046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.01617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7821044921875
############ Running episode number: 814  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8152770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8941345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.07257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.78741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0409240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8960266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8091125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1057434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.85699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.15460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.85101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.00689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0210876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9768371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0368957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.07220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.77642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.11724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9622497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1538391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.91943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1369323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.08056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8468933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.18572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.99737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1815490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8926696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.08697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.08599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.79833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0408630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0460510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9963073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.67974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8013000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7219543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1017150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0202331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.10711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1346130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0615539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9615783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92303466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7961730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.90972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9193420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9154357910156
############ Running episode number: 815  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8086853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.02520751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8446960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.86444091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.71185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.95684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.07470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0792541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9417419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.21466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1330871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.09674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.12042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6883239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1216125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.90704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.98687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0082092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0910949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.78082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0167541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9312438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0624694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1666564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6950988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.99267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.08197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8376770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.83697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9432678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9512023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9397277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0412292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.07586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2121887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8000183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7810974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7792053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.71490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9964599609375
############ Running episode number: 816  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0838317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.98345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8633117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9208679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0295104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1552429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.03717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8897399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.76953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.80328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9817199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.73974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0795593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.89599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.88812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.18988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8169250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8321228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8335876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.04681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8231506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9543151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8392639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0201110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.01593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0617980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9286193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1623840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8718566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7167663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.67181396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7525329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7665100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.86151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9499206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9902038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.13409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.02001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9479675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8286437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1053771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.79217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9278869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0845031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0074768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.97265625
############ Running episode number: 817  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.15087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.73443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8500671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9168395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9712219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7987365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0912780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0105285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.79156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.85552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0689392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9186706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7117004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9993591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9563293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.94134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.00341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.86029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91632080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7914123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9568176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9175720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8215637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8932800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9299011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.15386962890625
Action +2 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 4 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.86602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.18426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2070617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0018615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8940734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3644714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8571472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0133972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.08721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2002258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0850524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9148864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.10906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.93524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.08270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8027648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.96990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7992248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0372619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.66741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.756591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6964111328125
############ Running episode number: 818  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9859924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.94525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.10601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.94110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2065734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0263366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.77154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9089660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0488586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.81903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0633850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.73870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.02838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9165344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7157897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9030456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9927062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.85186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.16241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8097839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.80035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9901428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0655212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7988586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.3020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1475524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.87744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0091247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9854431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9395446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9661560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.92572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.97216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0716247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1011657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7936706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8985290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9573059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0625915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.07342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.80548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9263610839844
############ Running episode number: 819  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1032409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9288635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1266784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.04510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.95989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8037414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.13128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0664367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7654113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8917541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0678405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0325012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2047424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2559509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7623596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.97003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0254211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.75030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.11669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7437744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8546142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8078308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9787292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0514831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9046936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.72216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.76605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8285217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9842834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6657409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1329040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0642395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8080139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8451232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1047058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8821716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.88427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.89227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.90606689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.77545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0567321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9482727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8750305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.16790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1045837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7624206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.13409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.95452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.76995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.86981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0625305175781
############ Running episode number: 820  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9702453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.10675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.99542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.74456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9936218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1385192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7199401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.88494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0408630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.86199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.06280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9472961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.89007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0738220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.06500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0267639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.76763916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0218200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7180480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8578796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0247802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9789123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0581970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9169006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.83203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99444580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.77923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1688537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.87139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.77593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9992370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7221374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6864318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0569152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.02166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1355285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0069274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0475769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0315246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02203369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8219299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8625793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9755554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.90252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0742492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7449035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0124206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7778015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.92120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9968566894531
############ Running episode number: 821  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.10296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9369812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7352600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1155090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.95849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.86737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9841003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.79547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6893005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9373474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.2637023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9836730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7693786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8232116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.19659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8581848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7814025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94549560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1380920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.24700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0499572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.07440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.97442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7883605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0170593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8710632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.78692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9319763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0353088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.95806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.82086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.02301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.81463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2346496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9252014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9432678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8915710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8636779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.10357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9058532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9593811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.82965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.91943359375
############ Running episode number: 822  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.09661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9338073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.94122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9225158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8666687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9922180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8912048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9856262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9251403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8429260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.74615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0843200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1211242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9361267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8985900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8569641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.54156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9440002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7998962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9725036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9809265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.98681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0998840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6923522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9776306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.90350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6580505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1316223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8515930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.89849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6076965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1557312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.06121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.83795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.06463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0137023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9632263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7027893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0503234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9993591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1003723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7975158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9380187988281
############ Running episode number: 823  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.89532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0622863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.82525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.93780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0930480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.77716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8572082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.14715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.15093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0313415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6947326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.06170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1943664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.99334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.09344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8393859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1738586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8285217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0711364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.00433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.08892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.07196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.08056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8858337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.81707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7855529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96160888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0512390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.06451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.99713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0650939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.87261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.09942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.74761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1205749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.32415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.13372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1667785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9695739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8460388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.90838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9272766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.88525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9830017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.028076171875
############ Running episode number: 824  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0273742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9390563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.70904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8455505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7746887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.83685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.78631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9925537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.83624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.04327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.15118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.05572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0731506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1935729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.81365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.15765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0616455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0821228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7795104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.96368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9355163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7984924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9526062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9073791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9401550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2731628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6358337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.15765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9858093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9508056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.71063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8296813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.00958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.79443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.94580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6753845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.75360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.5110168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.05889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.13641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0506286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0082702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.01788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8064880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0581359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8182067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.05804443359375
############ Running episode number: 825  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8570251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.73846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9399108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7234802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1589660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.77197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.19146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7967224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.79095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7862854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7526550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.80084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1175231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.05810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0539245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0851745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8883361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9194641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1355895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.87127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1147155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.13055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.99639892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0941467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.78045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.90167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8386535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8009948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.14544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0477600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7431945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0872497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1637268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.85888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1010437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.82684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.11962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7986145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9233703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8423767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.85308837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.04168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9325866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9732360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.737060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.02685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9154968261719
############ Running episode number: 826  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0671081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1455383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0333557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9393005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.84014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.17120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9187316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8895568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8666687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.78741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1258850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.86773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.78045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7492980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1969299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7216491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9977722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1311340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.82489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.82965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.91436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.83868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8875427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0830993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8384704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.4262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8492126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1517028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.82696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.04595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.11181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6684265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1612854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9138488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.15167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0964660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9838562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.93890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.96807861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2367248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0945129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0591735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6592712402344
############ Running episode number: 827  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9995422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.97174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.18646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8537902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0427551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.01727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8686828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1376037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9740905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0703430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0692443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.78717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.04547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.12725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9147644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9999694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0928649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.06024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.70166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7593688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.15594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8832702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.17791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.08734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9228210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8149108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0509338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2128601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.85797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9124450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.99371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7456970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7901916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.08099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.06072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8966369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.89471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.09979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0614929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.64080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8716735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.11114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.5744934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9823913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7773132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9457702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.85260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0773010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1352844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.02001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1252136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9021301269531
############ Running episode number: 828  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.84820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.86248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1332092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.6645812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.82928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.89617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1030578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0417175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7320861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.09771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.28662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9594421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1312561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9140930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8091125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.84271240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.08843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0083312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.07830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.81195068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9081726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.72796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8063659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9485168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9967956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0152893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2456359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1637878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.03271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.92584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7906188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8456726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0633544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1693420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.92431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.16302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0044860839844
############ Running episode number: 829  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1518859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.11016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.85992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0487365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.23809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9462585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1600646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.75244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0906066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.58441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.72845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9897155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8441467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9200134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8957824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.06097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0361022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2162780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.83331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7975769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.12786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7782897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9543762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0406188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8880310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1003723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.95953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.05426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.98516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8653869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.5994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.97686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.04498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9196472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.79754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8296203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0294494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0606384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0698547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2056579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06402587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.14801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0757751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.86749267578125
############ Running episode number: 830  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0401916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.79779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.05224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1578674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9754333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.18170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.89739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1339416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1091003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.86358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8481140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0862731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9241027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.12054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8710021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.93701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1570739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.99462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.98590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8856506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1606140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0542907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8824768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.08990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.3238220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7798767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0552673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.78240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6114196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9754333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.12310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9047546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.86767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0668029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.96246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.88134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.73480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9742736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.18438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8750305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.03729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.03570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.95947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.05426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2257995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.09368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8855895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0657653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0025634765625
############ Running episode number: 831  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0923767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7975769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1522521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8048400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1383361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0128479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.85504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.82745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1137390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8825378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.87060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.85321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.08709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.96868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8335266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9723815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9237365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8711242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1265563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.05145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8160095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2729797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.85272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0114440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9604797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.08514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.87152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.78094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8286437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7709045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8281555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9479675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0168151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7978210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1478576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8707580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9546813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.5892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1858215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.24285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.2660827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7622985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.10101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.61431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0138244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9754333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.14794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.94818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8023986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.82025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1140441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9543762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.83319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0851135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6374816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0763244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0632629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.83367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07562255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1571960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0577087402344
############ Running episode number: 832  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.92578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8647766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.76434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.17138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1394958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8186340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9134216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.07464599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.01495361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.00762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1180114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.73870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1560363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.79791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.14019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7641296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.16290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0332336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.78533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8861999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.08642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8174133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8510437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.10345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.00128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0906677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8446044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.00286865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0444641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0625305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.02459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.98480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.75909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9529113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7083740234375
############ Running episode number: 833  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7522888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.78472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0331726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8473815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0176696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.13580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7320861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.24407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.02362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.92816162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8290710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.03173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9403381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0729675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.92529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9601135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.79193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8689270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.69744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.14190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.840087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0259094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.20843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0678405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8612976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.83392333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.86029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1998596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7208557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.71771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0448303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.11920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9542541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9697570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1376647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8912658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.85986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7767028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8254089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9146423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8869934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.85955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0671691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.71844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.93914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9369812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9262390136719
############ Running episode number: 834  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1438903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8410339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.60540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.85784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2608337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1851501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8688659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8418884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8517761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9184875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7218322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.00640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.08294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.09381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0456848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.78900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.95489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.68194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9795837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0861511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9543762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.95281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.09283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9856872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.04730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1937561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7424621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.15277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.32513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.97119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9930114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0754699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7707214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9206848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9116516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8360290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1236877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0758361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.98602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.97650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87689208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8137512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.09710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0248107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1218566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.85357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.84173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.80657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0592956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8263854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.00042724609375
############ Running episode number: 835  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8055725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0812683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.92047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.12603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8562316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.84381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7082214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9148864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.60870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9962463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7340393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.08343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9211730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9553527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.82110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9696350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.14031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.06365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0187683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9866027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.68280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9145812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8788757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7880554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0306091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9381408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8648986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90570068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9493713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0664978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9044494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8764343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0471496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.10430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0596008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.91949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.09088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8760681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.06927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.13092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0314636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9743347167969
############ Running episode number: 836  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0212707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8147277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.24810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8120422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0803527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.89202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0243835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0719909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7913513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6794738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0680236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8658752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.98284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8633728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1114807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9020690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0172424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7726745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7729187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9943542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7975769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0362243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1513366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0185852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.88653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2568054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.10638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9470520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9488830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9298400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2493591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7516784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.86798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7463684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7261657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7911071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1051940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8152770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1091613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8627014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0729675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8075256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7341613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1892395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.28497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8848571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0624694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0073547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7890319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.97064208984375
############ Running episode number: 837  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8379821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.88763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9642028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0434875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.12646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.86663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9038391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1630554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8267517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9440002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8714904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.84320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8044128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.25927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.22088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.87127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0394592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.86126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1176452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8814392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.07440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90521240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1019592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8710021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8734436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6848449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8879089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.82733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.14202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.17889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.87896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6744079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.29278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.89019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9558410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.87493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1474914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.99853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.945556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.98248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0378723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.10296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0386047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8933410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9281921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1009826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9183044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8534851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.07220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8880615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.88421630859375
############ Running episode number: 838  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7431945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1038513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9762268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.08892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9397888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.95458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9350891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8944396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.11041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8273010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9743347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8130798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.017822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.05523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0373840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1125793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.11376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7764587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0549011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6941223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9330139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.3349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.87957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.38775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8553161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9408874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9308166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8841857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.83563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.85601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7962951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9963073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8468933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1462097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.81005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9461975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0804138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0462341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.08575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6981506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2274475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8467102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9126892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2292785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2207946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7663879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1499938964844
############ Running episode number: 839  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0908508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8818054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9689025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1233825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9073791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0672302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.87890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0987854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.93280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7456970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0452575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9062805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.82196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.95892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.71649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2169494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.2940979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1072692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2333068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.91485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1398620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9237976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7990417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1027526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.08612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8926696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6972351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.88653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.10528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8699035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.12481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.07794189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8070373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9391784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1761169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8981018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9145812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0273742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.88812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2303161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0289001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0945739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98712158203125
############ Running episode number: 840  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9508361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.14312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.13623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8346252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9827575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9967346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0729064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.06085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0111999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.95989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9122009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9627990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8946228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.94061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9566955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.19378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9570617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.93890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.75091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.94610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.02508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.96356201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8236999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8109436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.99151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0395202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0538024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0055847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9767761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.09588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.83807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8185729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9773254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9819641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7868957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.95904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.07818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0239562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9619445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0582580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.5955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.59375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1543273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7887878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1936340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8075256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1030578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9712829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0987854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7465515136719
############ Running episode number: 841  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.86004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9804382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9552307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9283752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9699401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0142822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1145324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8241882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.18115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9255676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8918151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.3580017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9436950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.6828918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.01336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2436828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.00274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1864318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9079284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.05731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9717102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.12744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9003601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.84521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.86572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.12750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1277770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9825134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0960998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.11065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1363220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8791809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0780334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.045654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.03363037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0893249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.93280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9603576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.88616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8944396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8811340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9587707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.71771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9580993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8616638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0386962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.97711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9554748535156
############ Running episode number: 842  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.92987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0038757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0841979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8588562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.77362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.13482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9872131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9078674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9973449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7431335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8269348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1143493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0142517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0786437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0149230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1922302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8162536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8297424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0747985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8028869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6701354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9258117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.06988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0705261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7803039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.30096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0060119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9607238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9083557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1756286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.92913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6519470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.77288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0760192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.12030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8418273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8501281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.93634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.79693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1983337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0649108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2067565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1421203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1420593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.85577392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9674987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.96612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1080627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.90875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.81640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1158752441406
############ Running episode number: 843  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8303527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8157653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.12640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.80523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7268981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0801086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.04046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.05999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1019592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0790710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.03717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9909362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.03350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.96661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.16302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.11004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9729309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.12725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7774963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.85137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.85064697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.84344482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.81207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9370422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00946044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7671203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1160583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0416564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0151672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8560485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0335388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9017639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.06524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.92822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9117736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.87982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.04766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.06951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7584228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1655578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.95550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8130798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.833984375
############ Running episode number: 844  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.98663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.03680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0005798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8428039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7154235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1562805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7467346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9604187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9645080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0229797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0513610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9590759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9156799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9967956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7322082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9330139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1610412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8965148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.95758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1260681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.82196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7980041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.14251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9843444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7872619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9183654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.82476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.65325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.09967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9281311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.79681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.15728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0937194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.05206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0281677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.09600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0316467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1631774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.08056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0494079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0174255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.86041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3257751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.82086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.95794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0066223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8933410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.82354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1665954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8770446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1106872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1491394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.93023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0855407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9464416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8990173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0715637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.68829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0042419433594
############ Running episode number: 845  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7483825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9234924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0058288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.88897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9109802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9319152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7461242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0729064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.07171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9399719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1815490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1722412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.03424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.10052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.95477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.14288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1206970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0717468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0196838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8351135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9511413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.71148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1144714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.04888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.6830139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0557556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.04473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9210510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1595764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9447937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.12030029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.81427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.942626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.84600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1203918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0711975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.20855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0072326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8908386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0327453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6283264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9238586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9253845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.17535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8424377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.84698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9063415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9471740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0398254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7842102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.83355712890625
############ Running episode number: 846  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0014953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.77044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8517150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0666198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9372863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.05816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8079528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1500549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.04693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8304748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.80987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.84027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9419860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9205017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.08709716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.90576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9787292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0207214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0044860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.06695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.28057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0863342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.05950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.14923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.11126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.60784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1394348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7392883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8852233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1731872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1654357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9026184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7860412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.20989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.89080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.77288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.17510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.771240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8706359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1366882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8652038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1657409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.06219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7529602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.99798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.97308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0234069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8742980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.02874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0159606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9062805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7893981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.11761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0822448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.94781494140625
############ Running episode number: 847  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8601379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0110778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9143981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.16461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9792175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.87982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0520935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.00445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0204162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.91705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.76708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6850891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0254821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0809631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.09149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7926940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7143249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.69873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.04815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.70855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9645690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1210021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.12652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9349060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.68359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7506408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.20892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.83837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8112487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9510192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0301818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8682556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.85845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.15008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0055847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.70928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8230895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8525085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0162048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0067443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1949768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0790100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9358825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.11663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1321105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9433898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9916687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.16278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0519104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.74884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9150085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.05615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.76898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7678527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.76849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.5947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0827331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0039978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.17645263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9328308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8435974121094
############ Running episode number: 848  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.97113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8716735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0817565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7671813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0534362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.13946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.73291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.14306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0373229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8116149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.15447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0186462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2003479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9213562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8841247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0354919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0228576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0101013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7602233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8031311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.68463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9552917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.02069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.98614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.80035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7466735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0126647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7467956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0951843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9403991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.95245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2130432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.990966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1273498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0764465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.25384521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.01300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9463195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0198059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.19830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.92352294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9832458496094
############ Running episode number: 849  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9017028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9178771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0953674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.02239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.71380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7427673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1744079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1380310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0107727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8171081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0304870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1855163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.92193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1498718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7795715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.14776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1697082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.95611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.91796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.84222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9322204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8897399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8146667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.3504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9035339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.98388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8189392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.06207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7975158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.83673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.84832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.93243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0166931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1524963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8480529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1040344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.04034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.93011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8731994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9397277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.96368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9374694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0976867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8840026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.3016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7748107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7953186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7754821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8752136230469
############ Running episode number: 850  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0926208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.89385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.82696533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.00811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1272888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.89154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9087829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.3011779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.19866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1878356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7120056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1370544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8456115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2182312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8869323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.08221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.76910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.5870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9402160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3393249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8641052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.04443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.02398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.75592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9270324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0562438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.94744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0307922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1841735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.15777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8917541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1304626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8355407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0401306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8600769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8819274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.78411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8383483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.91107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0201721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.80560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0550842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0680236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.83135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7250671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9631042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1936340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.17095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0610046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.86395263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.08203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.888916015625
############ Running episode number: 851  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9633483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.11578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9841613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.01690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.69952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.08843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0589904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.05126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.82293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0158996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0928039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1712341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0991516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9609680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1073913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.92059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0233459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9458923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.79248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.05194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9447326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1179504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.6542663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99139404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.86962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0984802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8332824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8523254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.83563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6782531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.98431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8217468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.77459716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.08160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8703308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9981994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0461730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.75213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6483459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1255798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0654602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8675231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0054016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.6172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.02496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.10028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.18914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0619201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0299377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.83233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.93402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7488708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.90399169921875
############ Running episode number: 852  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0716247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1105041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0523376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.2430725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8245544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.81280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.69793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.06597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.81451416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0884094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0254211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0014953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9881896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9864807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.91864013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.68280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.85955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.13739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2151794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2568664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7837219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.95684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0135192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.05242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0344543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.6151428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8901062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9047546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9386291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7799377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.04693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0839538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.11688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.51019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.81231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.81024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1127014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2185363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7988586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9172058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.64569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8498229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1212463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0922546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0170593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7195739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8782653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.93359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.68988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1990661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0367736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1897888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.18377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.00335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9472351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0010070800781
############ Running episode number: 853  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.81121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8606262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0934753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8111877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.88323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8711853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.93231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0098571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9571838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1446228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.95001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0005798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.12335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9653015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.10076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8719177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6377868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0263366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.86004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.81256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8445739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.64556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.6326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.89447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9955749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.3487854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9682922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9252624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.93829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9955139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.88458251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8196716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1980895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9472961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0272521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2066955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8761291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0156555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0924377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1214904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.02398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.88348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9284973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7818908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0682067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1095886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.15020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0679016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8967590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8750915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.70086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.17425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.70184326171875
############ Running episode number: 854  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1841125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9713134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1428527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7442321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7475280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.86846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0039367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8762512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.79095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8291931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0956115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0120544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.85113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0527648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.84521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8835144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1374206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.02667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8857727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1877746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1396789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.88922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9331970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9768981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.90576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.91845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8251037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7617492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9515075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.95904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1202087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.06781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.91326904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.69580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.82177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.83056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.17095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8980407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7967834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.09014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.2488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.86541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9253234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7328186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9204406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.94134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.74530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.64996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1396789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.929443359375
############ Running episode number: 855  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.96917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.86566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0899658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.23590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9469299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7634582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9797668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6748352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.09075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.97930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.01019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8714904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.89599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.68115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.88702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8065490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7341003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8278503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6412658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9996032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8128356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8653869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8288879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0001525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9751892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0594482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.01123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.96551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.76153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0403747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.86212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.85125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6844177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9162902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1083068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9638366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.82073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9037170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8334655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9349670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0868835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.90704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.10150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0008850097656
############ Running episode number: 856  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.82635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1086120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.91571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.21435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8542785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0472717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9037780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1689147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7545471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.92822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.73876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9098815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.85418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2283020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1173400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9076843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.01202392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0812683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7361145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0621032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8227844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7805480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.83270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0229797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1376037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.86212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8346862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.02313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1981201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8454284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1299133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9195861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0904235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2451477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.82061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0932312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7565612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.10333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8507385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7008972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0951843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9820861816406
############ Running episode number: 857  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0193176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8186340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9241027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.06097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0478210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9861145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0667419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.32421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.05560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.19720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9299011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9215393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9916687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.12060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2046203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3056945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6806335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0071716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.81304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0649719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.10992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9333801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9207458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.10784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.82568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.79254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1351013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1795349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.28155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9680480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.80413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1077575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0112609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0339050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1196594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1175842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9921569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1732177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7845764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0169982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1517639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9828186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7465515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0151672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9941101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9385070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8275451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.12139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.879638671875
############ Running episode number: 858  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1057434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8094177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1526794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8659973144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9010314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.10235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1095886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0758056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2500915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.05963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9440612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.95550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1021423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.08551025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.13330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.84759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.01837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9990539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9221496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.15478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9678039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.11181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.87652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.06976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.27587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7736511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9071960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1165466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7516784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8106994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7369079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7475280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9850158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1333312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9919128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1399841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9237976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.84735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1147766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.02008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8851013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.71343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8630676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0133972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7397155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7524108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0096435546875
############ Running episode number: 859  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9142761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8381652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0655212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1963806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.73211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.88232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1672668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.75775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.99383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9190368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.24176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8388977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.05291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0470275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8111267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0202941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98870849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.91607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.90863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.88043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8481140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.87762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.86383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.14239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.97894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0420837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.94781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.79400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9885559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0526428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7640075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.16925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7508239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8468933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0055236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7145690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1811218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.98114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7956237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.93170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.86578369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.02349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1752014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9091491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9324035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8993225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8112487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8295593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9711608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.15960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1042175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1325378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9785461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8869323730469
############ Running episode number: 860  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.78887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0780334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0270080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.81646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2245178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.77008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.72039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0322570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9105529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0059509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82403564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.6899719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.93603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.846435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.04791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9432678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8156433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1079406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.77362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.01055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.83380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7454833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7179870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9386291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0969543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8122863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.87445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8790588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.09539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9869079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3369445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9943542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1914978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0892028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9454040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0707092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.99530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.82257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.79498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8412780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7729187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0688171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0360412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8333435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.91058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8755798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.86138916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0226135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1177978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0466613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.83135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9798278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7724914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.89288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.72125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.97186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.127197265625
############ Running episode number: 861  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.15283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1496887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.84967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1555480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9248352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9346008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8161926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.74029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0783386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9806823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0142517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9472961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.97833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1353454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8942565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.91168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1306457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.01239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.79364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8489074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1023864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8663024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7391662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9331359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7887268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0909118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9105529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.14288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.11090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9870300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7197570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9791564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.88616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0758972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.89984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.85205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7253723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8993225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7737731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.076416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9438781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8194274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9263000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0112609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7695617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0892028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.17999267578125
############ Running episode number: 862  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.86834716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0779113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.97198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0443420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8610534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9021301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7287902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9768371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.05084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.84185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6497497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8824157714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7525329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0942687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.13873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9979553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0669250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.10858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9385681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9894714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8166809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9926452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.15032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.02978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2015686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2033996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0465393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.99163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1511535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.95501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8782653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8470764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.85479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.98187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9653625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.75518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.74652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.199951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8279113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9632263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0322570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9247741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.12506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.93597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9469909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9269714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.81939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8366394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0331115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.85235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0799865722656
############ Running episode number: 863  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9898986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1292419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9137878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.12921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.84844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1614074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.86700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7993469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.87982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9173889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.09283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.93243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2442321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9871520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.03369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1365661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7397155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.08050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0469055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.97503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9290466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1946716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1261291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9457092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7953186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6471862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1785583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1321105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.06585693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7330322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.06268310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0181579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.06427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0188903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7344665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.86663818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0411071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6603088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.13330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.78253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0728454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.87396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7423095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8313903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0533447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.84893798828125
############ Running episode number: 864  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.12799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8329162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0943298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.81280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0533142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9740295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9566345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.14678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8822937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9782409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8887023925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.83599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0558166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0432434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.90740966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.90380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.95721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.08612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1606140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9383850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.89599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.80426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8349914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1956481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2387390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8839416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9559631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8025207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8616638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.80059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.79327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0444030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.85137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7446594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.05859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9515075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1470642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9566345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1188659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.07012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6611633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7545471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80731201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.79864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.11334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8359680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9717102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.00738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0173645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7380065917969
############ Running episode number: 865  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.92315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.17047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1119079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.86614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7840881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8173522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8983459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7932434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.70526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.14178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7513122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1557312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.11968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.84832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.85400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8017272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.08636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8700256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8130187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.79669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8370361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.06683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1137390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0748596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.81494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8423156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.81158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1258850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.97027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0508117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9594421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8587341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.06890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.12994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.28570556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.89312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0782165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.00830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8625793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0691833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8816223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.68084716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0165100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.69879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1351013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8849182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1443176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0553894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.09710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8393249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95355224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.05810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7622985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.05096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9642639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0697937011719
############ Running episode number: 866  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7749938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.58038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.98272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9472351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1468200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.05230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.74957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.76971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0907287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1158752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.84820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8190002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.84173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.85675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9804382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0444030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.14019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8706970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.10302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.90887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.85528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0939025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9109802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0412292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9510192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9019470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.11029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1322326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8180847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0506896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.15814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.07513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8608093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.82525634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1623229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.94476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.92498779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.89141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.95074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0984191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9135437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9515686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8177795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9743347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9760437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8688049316406
############ Running episode number: 867  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1246032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0027160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.05523681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.78851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1365051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0137634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0858459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.83868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.95867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2430114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7695617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9860534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.94049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8273620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.04949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1230163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9002990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0909729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1398620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.88653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8159484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.78155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0870666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.78009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8768310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6706848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.97760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8949279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.06414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1598815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.82940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.2152404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6704406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.88787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91168212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1542663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8567810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9009094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9813537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8188781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1703796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0262756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9220886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.21905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0985412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.25482177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7970275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.82958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.76788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8665466308594
############ Running episode number: 868  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8092346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1127014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.95672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9079895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.90234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0298156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.98626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0661926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0717468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9172668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7946472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.03228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8416442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1404724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8307800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7875671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.96240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.94207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.14715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0503234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0768127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.05133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.08343505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.04022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.98388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.10516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0741882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.86053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.67620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0148620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.10723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8881530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1532287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0830993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.92449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1383361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.88116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.87615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0927429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1607971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.14544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.04583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9405212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9313049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.72967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9892272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8546447753906
############ Running episode number: 869  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8062438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8575439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8263854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0934143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9888610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8257751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.07659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1191711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0224914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9678649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0640563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.04364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.92193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8269958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.88153076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.01953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0248107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8104553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8775329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.12322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.69561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0933532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9765930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7311096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87274169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8506774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0002746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.22784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.99920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9932556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7524108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.11846923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.17083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0832824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0861511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8680114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.84173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.96905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9685363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9148864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.03326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1008605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1797180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0267639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0913391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8609924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0251159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7601623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7546691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0476379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.13751220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.04010009765625
############ Running episode number: 870  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0217590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.84979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8022766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9560852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0961608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9197692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.2135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0090026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.04833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.89013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1830139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9252014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7907409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.94122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.11248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0251770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1932067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1702880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7674865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8315124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9696960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.01531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0590515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8169860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0545959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1034240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9703369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8322448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.20361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.82586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.96063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.15045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0554504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.84503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9310607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0304870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1144104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8497009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.01788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.20062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0513610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2684631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.19488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.02239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.89892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.00872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8791198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9220886230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8586120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8277893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.04522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1467590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.95654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6985778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1480407714844
############ Running episode number: 871  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0423889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.01141357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7716369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9313049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1355895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.87493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0287170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9637756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.15972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9106750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.80792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9914245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.11224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.71661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0793151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1435241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9974060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8309631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9126281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8411865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7806701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8570251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.13653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9626770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1380310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.08935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.95233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9679260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.12359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8407287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2102966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.95440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.90863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.85101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.21746826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9225769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8272399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0535583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0734558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.05560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8260192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1098327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.95025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0744323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.72821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.101318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8870544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2320251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.00360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.92010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7508850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.90887451171875
############ Running episode number: 872  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7517395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.13043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0294494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.02215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8154602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8785705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0574035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.05517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0067443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7572937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0066833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0128479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0130920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1022033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7124328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.18194580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9177551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6392517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8805236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7662658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0204162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8348083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.81488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7791442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0509338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0167541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2165832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.75244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1511535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.04345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0854187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.75372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.07342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.89959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7477722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.88238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8119201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.80828857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0187072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.87762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1854553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7875671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.60809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7958679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8910217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.81658935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8343811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0157165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9468078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0424499511719
############ Running episode number: 873  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7884216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9507141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0646667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1505432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.21978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8711242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9226379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.07275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.06036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.6419372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9230651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0751037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.03369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.88934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.79437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8487243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.79443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0350341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.82958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.90374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9924011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1387634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.74383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.81939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1404113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0653381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8527526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.92388916015625
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 1 20.0 992.681522335 (12.19918626616789, 10)
loss 358.91949462890625
Current State,action,reward,Response time,Next State:  (10, 12.19918626616789) 3 19.0 1024.75516863 (12.501496275411796, 11)
loss 359.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.76934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9317932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.99322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.80755615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.00360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0174865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7145080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1842346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.09674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2147521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0155944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.18292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6375427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8884582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2090759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.91009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1282043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0943908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9569396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.4947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0694885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.05938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9690246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0418395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.74188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.84368896484375
############ Running episode number: 874  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.63555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9197082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3545837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.00054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1869201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.23602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.3721618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.60943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.4100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9795227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8974914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0929260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0782775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0679016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7828063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7875061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.92767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9660949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9115295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.5869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.24371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9862365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.76800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8692626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9135437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9936828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9533996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.11383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0481872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.85931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.03509521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9154357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02081298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.07806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8138427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1781311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.02996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9492492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8404235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8175354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0523376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0272521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.4881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0379333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.18475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9334411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9333190917969
############ Running episode number: 875  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.13616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7860412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8153991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7204895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0269775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0315856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.47784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9597473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7003479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1252136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1037902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.70562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8523254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8829650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1569519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.16485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.01019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.65118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0713195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.2938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.77557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.95526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.3974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9352111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9270324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8575744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86456298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8314514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9122009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9443054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.79266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.83172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1790466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9932556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8391418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3136901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.08587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.33795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7265930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9318542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1323547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.67236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8341979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0437316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.90069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9847106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.12396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.6603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.4786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8249206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.48431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.83526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2759094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.6518859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9100646972656
############ Running episode number: 876  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.87713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.02252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7003479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9521789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9831237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8604431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2660217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.14105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.89691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6952819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.44610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9435119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7408752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9604187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.78814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.08795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.82574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8974304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.3968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0136413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2894592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.93133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.90069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1781311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.16351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1425476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0500183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8953552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.84417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8258972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.90374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.5917663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.4794616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.2469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8890075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7604064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8141784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.01715087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.81768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.6197509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.76959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.07354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.92132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8043518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0317687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1468200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9877014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07452392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0027770996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0024108886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9255065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7711486816406
############ Running episode number: 877  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7342224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.34356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9696350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8727722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0757141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1196594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2580871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1276550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8268737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.4459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1800842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7582092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.90130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.6792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0500793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.4876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9728088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1089782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.5190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8512878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.38177490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.94189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.6645812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8450012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.94915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0682678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9685974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0035095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.6521911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8432312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8335266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1862487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.00604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.62255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.80364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.3951110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9977722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8989562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7380065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8014831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.92529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.95220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1049499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1451110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.67950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.90777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.89208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2346496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8856506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86322021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.82171630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8380432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0434875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1211853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2635803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9616394042969
############ Running episode number: 878  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1947326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.97772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.3101501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9505310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.27691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.3091125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.6193542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7900085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0764465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.83001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.80926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.02362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2785949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1021423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.12908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0190734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9851989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.88702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9797058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.09381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.5896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.73675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7995300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.86944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8922424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.95654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7837829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8052673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.11041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8789978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.6967468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.4775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8067321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8765563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8663330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.13238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.55438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7494201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7549133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8970031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.3749084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.5919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2001037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3127136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.01043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1308898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.84222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.04644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0792541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8854675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.99261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.96612548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.17803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7680358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.95477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8888244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.6941223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.99737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.87213134765625
############ Running episode number: 879  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.09326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1363830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6413269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.3717346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0142517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.3729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0390930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9756774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.75390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7611389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8023376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.14117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.96636962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1830749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.07110595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.39691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8418273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8624572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7939758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8686218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9366760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9318542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.25921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9624328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.74945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.19854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9660339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.4112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8255920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9703674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.09478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.6642150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.34564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.16888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.4059143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7934265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0245056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.4471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8911437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9341125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.84124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.2239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1330871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.98736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8479919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8182067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.89794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3471374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.97027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1089172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7776794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1046447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.3408508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9815368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9717102050781
############ Running episode number: 880  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6028137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.2281799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9789123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0255432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.11083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2423400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.99578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.08160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9598083496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.94512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.6248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9803161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9837951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8733215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9819641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7215270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.4120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2891540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8764953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.11419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8320007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.77130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.91961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8778991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2170715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8574523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8988342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9899597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.04058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.00543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.13128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.73175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.79827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0982360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.78680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.83099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.05694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2029113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.10784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8277282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8664245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.09710693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.13531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7835998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0791931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.01751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1060485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1086120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.66265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.81829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0139465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.34246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.74810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.89520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8567810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.98919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.04376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8684997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2330017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0410461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.03167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6654357910156
############ Running episode number: 881  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0646667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2029724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.75726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.82684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.12872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.16754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3111267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9316101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0783996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.3349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.85418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2060852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9367370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7582702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2266540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.13470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.70477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.81219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.03680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.09320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8399353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.05078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9125671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8020324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9247131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.210205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0034484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0343933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8600158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.56976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.4971618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.11553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.10150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1062316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.05572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.22003173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7471008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0610046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.06683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0771179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8011474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.78289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.92572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7968444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9394226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.26007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8550720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7060241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7949523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9188537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.11865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7489929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1009216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.11651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.51666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0543518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9463806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7629089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.08587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.05267333984375
############ Running episode number: 882  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0169372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0303039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.3908386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8112487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0003356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.82049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8337097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8173522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.6714782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9292907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7315979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7383117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0008239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.80230712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.91217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0521545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2557067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0681457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9082336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.94384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0384826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2071228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0353088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9593811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.12872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0605163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.3704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.3128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.99261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.6393737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0685729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.6927185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.11029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.3376159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.09783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0357666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.16754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1645812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9915466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.97271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7555236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.09149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9344177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8297424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.13677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9144592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8699035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0282897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0705871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.3389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0375671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7959289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9792175292969
############ Running episode number: 883  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9145812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.4512634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0982360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7591247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.99462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.6675720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9966125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7842102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9904479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0422058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.06817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0142517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1335754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9034118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9616394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0091247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.26123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.38140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9229431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.6295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.84429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.88372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.13629150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8678283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.4419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0174865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.146240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7149353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0659484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0204772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1097412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2057189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8255310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7946472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7550964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9336242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8855285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.04132080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.99859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8691711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.6871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8475646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0776062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9983825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8105773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9756164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0381164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8799743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1327819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8901062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.99969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.64501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.07843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9742126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8349914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0514221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0314025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1197814941406
############ Running episode number: 884  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8611145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1507263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0046081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.06396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1660461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0912170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.79052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.23455810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8121032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9502258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2102355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7181091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.83148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7160949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0670471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.86114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.91754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0776062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8756408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.93408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8514709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9222106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.98895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1423645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1076354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7842712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8224792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0807800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.01629638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.2063293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.89605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9737854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0005798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8644714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0304260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8628234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0197448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.82061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.85870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.06976318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1221923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1432800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8463439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.76031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9718322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.16436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0590515136719
############ Running episode number: 885  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.89544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1059265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9887390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.92626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.08929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0832214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.77398681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9483337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.89117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.05902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1336364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6769104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9584655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9329833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.79876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.91082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9002380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9493713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1870422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1372375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9271545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9737243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8152160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7113952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.15972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0298156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0294494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9245910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7808532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0443420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9587707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0840759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9071960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0277404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1178894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0827331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1315002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1666259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8333435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8569641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7895812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0368957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1076354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8661193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.08978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6856384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.27362060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0733337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9772644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9579772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7456970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7770080566406
############ Running episode number: 886  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.80029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.11163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.97308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8871765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.24114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8584289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9518127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.69610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98175048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0787048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8190002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0964660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.85479736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1445007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8401184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1189270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0830383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1389465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.14459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.99981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0550231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.05780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1388854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.82244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9114074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8981018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.6450500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.98284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.73828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.16375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1507873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.71710205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2923278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0768737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.98602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.09619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.80169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0528869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.03033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9360656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.86376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8031311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.92523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8417663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8125915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9349365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9442443847656
############ Running episode number: 887  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8637390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.84222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.88702392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.84051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0151062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1064758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1651916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1131896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1756286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.81536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0039978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9675598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7132873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0809020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1164245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9598693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9936828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7903747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.04248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9059753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.99835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6897888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0106506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9804992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0104675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9422912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.92633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.14111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1826477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.06304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.2391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9490661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9972839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.12200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0836486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9510803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1891784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0657653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9568786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1656799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.93292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.91729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.79766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.89068603515625
############ Running episode number: 888  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.2351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9085388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.69403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.83782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.86956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1034851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9991149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.97894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8676452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7994689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8367919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.92315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1731872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.10418701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.01116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9861755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1147155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.96270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7941589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.92010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.03216552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.6238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.71392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9698181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9007873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.92315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0712585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7962951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9258728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1982727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7170715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.82037353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.81121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8244323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.27044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.14105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7960510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9472351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8393249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.03753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9803771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0307922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.05877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.02911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1224365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0553283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.97320556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0581970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.06915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.88739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.01690673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9833679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1349792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6377258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.80572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8972473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0597229003906
############ Running episode number: 889  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.99951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0597229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.78179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.5821838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1684875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.21258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9706726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.32635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7873229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8938903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0247497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0544128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9960632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9806213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0574951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.87237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8416442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.11749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0056457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0743713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.08319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7856140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.10894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9960632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.93646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7956848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.24029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.06536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0998229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0784606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.44140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9328308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2191162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0283508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.08441162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.78240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1294250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.74102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0605773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9217224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7613220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0022277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.92987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9111633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.66717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.84442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1692199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6719665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0895080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0960388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.78607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9228210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8842468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9682922363281
############ Running episode number: 890  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.93255615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.04119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7931213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7487487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1180114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2039489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.97119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9356384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9460144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9035949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8132629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8328552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9453430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.81671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.74517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0987854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8172302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.02557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8668518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.87457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1181335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8490905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.78094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0415344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8582458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.76153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.99774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.91302490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.90411376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.92535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8171081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9276428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.03717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0040588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8272705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8143005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1425476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.80242919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.86761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7367858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0725402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6998596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.82598876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.70782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8499450683594
############ Running episode number: 891  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.06378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.88238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9479675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.91143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.79718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.01861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.78497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0610656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0616149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9261779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.80181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8902282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9143371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8332824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8072204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8451232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1017150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0246887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8426513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.15155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.98321533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2173156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8929138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0367736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9254455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1311340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.05157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.859619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.78680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.72027587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0021057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.87884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.88427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8530578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9405822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8033752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87847900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9300842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.95367431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.18914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7566833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0439147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8371887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7498474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9366149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.92669677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0041809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8382263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.81597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.78887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.08544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.889892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.05267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1660461425781
############ Running episode number: 892  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9721984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.81304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8528747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8667297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.23199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7882995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.09796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0346374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9283752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8397521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.92791748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9718933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9163513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9313659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9382019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.22662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9607238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9187927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6371154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0417175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8992614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.95123291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.72991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0023498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.91448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0270080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.81103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.29180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.95281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1233825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.16766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8771057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8136291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.78436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9974670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.00665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2013854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.24530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8324890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1477355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8493957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8623352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9622497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.873779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7999572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1427917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8185119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.6671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8910217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8504333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.67913818359375
############ Running episode number: 893  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.75048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0083923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9122009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8370666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7259521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9966125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8891296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.85076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8369445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1000061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8846740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0715637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0058898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0016784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9430236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.03485107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.5772399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9465637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.24273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.14312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0345764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.95025634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.04290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.99407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.89263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.05364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8661193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0234680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7958068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.75347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.01641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0091857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7775573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.08990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9208679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.96136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.21307373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1448059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8854064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.80322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.19903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.86492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8453063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0528564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7051696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7615051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8903503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.92578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9112243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.13916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1129455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0672912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0455017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1581726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.68890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.07818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8073425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0267639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9028625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.01251220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0913391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1824645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.5709533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.13580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8316955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8824768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9039001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9710998535156
############ Running episode number: 894  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.01324462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.11688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.93743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.10003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0303649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2113342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.10552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9978942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.84478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8982238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9616394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8706970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9749450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.14569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.10833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.85650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.3172912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.89068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8703918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1653137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1208190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.07806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9034118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1256103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.05615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9392395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8368225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.53936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.12506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7575988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.90478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0711669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.97235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7934265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.82232666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02838134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.11431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1257629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.96234130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.98284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.17120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.10198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.902587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2098693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9808044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.87066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.965576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0198059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8432922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.03228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.90032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.02569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.01373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8907165527344
############ Running episode number: 895  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9068298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.93597412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1329040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8594665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8518981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9362487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.10516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.98681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0531311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8935852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8653259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.91253662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9282531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0931701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8809509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1518859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.03497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.03057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1183166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.95416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8608093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.97088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.70648193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7707214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0635681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8448791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.62713623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9891052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0409240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9588928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.73101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8210144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.85382080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8924255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0347595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1362609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1537780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.08636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0514831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.95068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0879821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0005187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8191833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8976135253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.16094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1802673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9561462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9329528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.12261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.02484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9933166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.79656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1626892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1620178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.21112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0154113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8889465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.78125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8079528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0352478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9187316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8201599121094
############ Running episode number: 896  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.11956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.81512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8047790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9052429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.93499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.922607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.3563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8826599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0944519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.79998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1246643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.83465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9258117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.81915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0531921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9075622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.73724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9252624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8805847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9863586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8099670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9144592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.85504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0365295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.09197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9578552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0672302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.80169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.03192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8897399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7351989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1218566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.90936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8533630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8006286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.5159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.18878173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.97735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.06683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0209045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8964538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.05718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9490661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9640808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9613952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.72113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.82879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0113830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0010681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0918884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.82086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8768615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.100830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1518249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6678161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9016418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.744140625
############ Running episode number: 897  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.4964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7849426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.06744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8124084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8876037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9923400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0909118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.02655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.95623779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1759948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8881530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.11187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.10003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8936462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.82830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9396667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.3199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.17071533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8503723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7819519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8189697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.98809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.83868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9229736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.82415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9625549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1132507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.71551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.98516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9660949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9743347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9429016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1763610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7619323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8644104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1885070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1919860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.11090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1061096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0271301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.80419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0382995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.97015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8802795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8315124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8955993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0495910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.04486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2547302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.84539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0391540527344
############ Running episode number: 898  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7370910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9076232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9764709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.82867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9267883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8929748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9062194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9135437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1374816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6918640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.78857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.12567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6786193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.2746887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0460510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.00335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9981384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8602600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9095153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.6831359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6564025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.86639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0804138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96514892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7575378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9905700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0703430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.08026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.15838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.82275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.85430908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9066467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8796691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.83428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8298034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1167907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1993103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.71905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9736022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8406677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8861389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.08843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0516052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0337829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9005432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.4851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8583679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9938049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8191223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.82781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.03448486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.07684326171875
############ Running episode number: 899  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9739685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.88323974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.12078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8241271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.85589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0213928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.12042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0367736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8202819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7214050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7955627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7541809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.05670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8851623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9133605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.17376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1033020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8835754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9131164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.83624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.08758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6571044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.05780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.75885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.03314208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03533935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9216613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.89801025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.87664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1049499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.01025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0677185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.84283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.10614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1473693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.96929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.56817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.06500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.85736083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.09124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.05987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.06292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.78302001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8116760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8716125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00408935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1590881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0435485839844
############ Running episode number: 900  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9441223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0212707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1161193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9665832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2413635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7492370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1233825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0015869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8668212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.96392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.80517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.05108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.89447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7611389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9523620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7326354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8187255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7823181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.67156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7570495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.04443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.99920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1380920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.12933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9162292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7872009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.94146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8815612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9144592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0692443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1504821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1033630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7872619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8656005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8294372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.18243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1607360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8991394042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7508544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1929016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.85455322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87579345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8164978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.5964050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9185791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9743957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.02044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.83233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0842590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.74395751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8323669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.84112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9403381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.83502197265625
############ Running episode number: 901  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.04205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.85125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0445861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2475280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8200378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1027526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.73406982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.92999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.09246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7890930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0617370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9324645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0211486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.12347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8409118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.94830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0663146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.03955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.78021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.99462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9226379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7968444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0086975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0195007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8833312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.97174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9435729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9301452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8486022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8829040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9533996582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0160217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03033447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9576110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0475158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.22528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1867980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0612487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1115417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9397888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1012268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7315673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0528869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9476013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.3372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9789733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7177429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9757385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9434509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7738342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.05584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1158142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9216003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.21044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1034851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1086730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0868835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.90545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7984619140625
############ Running episode number: 902  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9779357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0007019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.79595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.84088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0210266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.01470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.05419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7789001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0004577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9606018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9285888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8008728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.00543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9089660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.13983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.86102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0353088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0028381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9528503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0657043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0164489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.83203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9447937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.82843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2666320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.94732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1711730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8413391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0176696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0077209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.99151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0717468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.79974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.23712158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9476623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8481750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1739196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1360778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8259582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9173889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1156921386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7315368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0913391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.93328857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8381652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.87823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1122741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.86865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0126037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7838439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1253356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.04632568359375
############ Running episode number: 903  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7942810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.17889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1920471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.97869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.11346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9454650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.13311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.94677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8898620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.6903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.91827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.18896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9428405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0280456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1297302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8523254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.00775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2478942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1023864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2170104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0421447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7786560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.85174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9468078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.79168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.99798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1611022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8706970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.89935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7141418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2174377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.93695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.04852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.91290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.11785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.15185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.85931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8921813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7812805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9839782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8449401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9787292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.69537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9720764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.13897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.22216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.05450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.83856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.86260986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8692321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7647399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1728210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9955139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0032958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0875549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0744934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0591735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6119689941406
############ Running episode number: 904  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9482727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7950744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8398742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0395812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7032165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.93145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1075134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9013977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.06793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.15289306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.18389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9396667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1567077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.11883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8780822753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9589538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.09979248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.01348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8150634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8424987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9562683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.10723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.77911376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9349060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7358093261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0158386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7824401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8038024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.92169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1983337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9983825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1943054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0121765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0902404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1008605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.98931884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.99066162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8243103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1501159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.09716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0523986816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.02813720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.99090576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.07672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9520263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.01214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0588684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.12530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0596618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.80316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9530334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.85345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.98773193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0687255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9920349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8950500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.10260009765625
############ Running episode number: 905  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.03759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0132141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.74200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.09423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6990051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7528991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9307556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8030090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.05657958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8682556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.07843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.96429443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1624450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9608459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.03192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0292663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8693542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.14300537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1703796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9759216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8078918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7401428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0187072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0480041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.76336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.77825927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7618713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9999084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8614501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.85223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.98858642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7135314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1739807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9805603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06390380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0732116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.21466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1827697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0209655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7495422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1175231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.92218017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.08819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0233459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8949890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.15771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8353576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.97930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.89813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.93896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.11871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9278869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.82281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9971618652344
############ Running episode number: 906  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9274597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9153137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.93304443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9959411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1697082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.02850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.96142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.91693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.91015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1074523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7540588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.86474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9060974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1128234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2148742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.6792297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.95391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0710754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0238952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1395568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8987121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1374206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.03643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9261169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9931945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9045715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1094055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0534362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9596252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.062255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0226745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8443298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9095764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.83111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7744445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.06512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.85833740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.73956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.02642822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0616760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.93798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0355529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9916076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9954528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8566589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0126647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.10125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.968017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1192626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1729736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.13824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.08953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8704528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7295837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.00360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8746337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7945861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.92852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9009704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93389892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7237243652344
############ Running episode number: 907  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8565979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.18536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9295349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.86004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.82159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8372497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9504089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2284240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8205261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7388000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8530578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.815185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.02294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9633483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8313903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0320739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9558410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.17449951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7586669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1634826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.5834655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9551696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.90277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.61962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.01336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9100036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1553649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1152038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.86126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.04608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8377380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8344421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8484802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9176330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.99407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9493713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8778381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0885925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9845275878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9989929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.81182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8702697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8900451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1844177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.73590087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.91131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9262390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9745178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9558410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.83306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0570983886719
############ Running episode number: 908  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0124816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9617004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.04547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1270446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.11651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8456115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.970947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.15972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7043151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8451843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1364440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.00201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.93548583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0567626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.77178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9193420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9599304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9986267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7245178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.98358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.81903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9533386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0265197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.95703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7922058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0180969238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0706481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0212097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9505920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8520202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0141906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0529479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6772766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2398376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.90240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7546081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.03350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0903015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9397277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1091613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.06158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.83465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.67510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9752502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.04681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8005676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1630554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.3387145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.80267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.17266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1575927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.02276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0130310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1969299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7981262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8647155761719
############ Running episode number: 909  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.79803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.12554931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1123352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7847595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8831481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.87451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1640930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9635925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7855529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.95404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.91680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9664611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.99169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.99945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.78961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.25836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7553405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.69476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.90716552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0204162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9481201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.99591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.12396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.96136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1346435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9378967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0948181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0988464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9514465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8439025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8604736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8684387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.08026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9174499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.037841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0591735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9925842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8748474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.97698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1623229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0728454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0971374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1925354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0268249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1470031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0457458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1714172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.03277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.93438720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7712097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6253967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0977478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.94061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.83258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.12884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1263732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.91876220703125
############ Running episode number: 910  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0685119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8564758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.02093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8947448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0713806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0990295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0516662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7344970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8640441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.01165771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.20550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.87933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0185241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9297790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7848205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8910217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0476989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8741455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9157409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0782165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9493713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8738098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1512145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9778137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1292724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.03125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1075744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0188293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8407287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.02984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9523010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.015380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0738220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0314025878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.3096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.99517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7793273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9303283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1494445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7835998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0281066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.00140380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.80120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.10589599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9125061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.20721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1391296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0855712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9510192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.03472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.98480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9422302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.04571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9760437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9290771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.07470703125
############ Running episode number: 911  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9347839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9634094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9060363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.07733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.04608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0931091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0403137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0746154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8340148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8836975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.72564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7655334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9775695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0231018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.74334716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.98345947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9270935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.04052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1365661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.96466064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8344421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0995178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9160461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.95562744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.04339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0749816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8770446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2182312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8562927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0429382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0539855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.87060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.68511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8276672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.85040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7928771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0288391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8168640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9767150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0428771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1277160644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8143005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8208312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8150939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8452453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9800720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.19122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7552185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.11627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.11273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9535827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.88690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8277587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0154113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.09906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1003723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.06695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9839782714844
############ Running episode number: 912  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.02142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8943786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0860290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1473388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9799499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7031555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.83892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.2427062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1722106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.78118896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9980163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.98486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9054260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.83935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8247375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7210693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.74578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0083312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2102966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1809997558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.13763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1711120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.89886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.73809814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0751647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7702941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9449768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.87152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.06982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.6977844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.85198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9353332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8503723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9573669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.90240478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.95721435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8941955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06219482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.01947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8650817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0802917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.95440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.93634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8797302246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.67425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2248840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0249938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7631530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.21551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.85504150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.99749755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0140075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0209655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0107727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.2933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.98687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8543395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.83203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8601989746094
############ Running episode number: 913  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98797607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.04876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1074523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.99432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.10040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1274719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7929992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.74932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.05291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1254577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.99530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8242492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.19342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7900085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.71478271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1854553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1744689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0271301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9646911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8616638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.00201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0293884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.86956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.06298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.88348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0207214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0275573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8764343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.11920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.95166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.83807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9530944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9561462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.02740478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2313537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.79058837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.86212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8252258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8304138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8415832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.85552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8385925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.79949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1116027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0155029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.99053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9346008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2074279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9957580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2010803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8936462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.21038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8603210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.77593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8875427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.55499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9771423339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.01336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8010559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7870178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0832214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0995788574219
############ Running episode number: 914  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0387878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9880676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0203552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0682067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8871765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.97222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0426330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.89453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.99700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.94281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.89910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.06036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.97503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0873107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7335205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2217102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.6665954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8661804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1085510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0482482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1659851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9507141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0704040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9774169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.05987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8672790527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9184265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.05291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2521057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.07098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6897888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.08447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6855773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9602355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9347229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8329162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7611999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.6628112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0110168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8691711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.64581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1643371582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0376892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1729431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0268859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9380798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.982666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1639709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8658142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8747863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0584411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.6993713378906
############ Running episode number: 915  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1471252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8332214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.93792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6385192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.88739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97540283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.06817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.22161865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.13525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1050720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.87908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9515380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.82745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.74072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8634338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9057312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1042785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.15545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.83428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9969177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1361999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7718811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9507751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8348693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7465515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0098571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0011291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.78729248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8658142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8581237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.84869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9107971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.87176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8049621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0060729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0796813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.86419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90179443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8464050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9983215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.16680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0135803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8844299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9471130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9571533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8634948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9844055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.11273193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8932189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.99554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9556579589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8913269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.24102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7555847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.91357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.23565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0000305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.90191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01544189453125
############ Running episode number: 916  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1691589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8016662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.12567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.08502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.09893798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9110107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8984680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.6543884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.99871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.06524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8220520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.04473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.14068603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8723449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7807312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8860778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.919189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1162414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0351257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0389099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7952575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.09527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.04833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.03814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0205383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.78802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0707702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.90728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1302185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0741882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.76055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.93701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8090515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0509948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9010009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1551513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.93951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9932556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6574401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.78765869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9233703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7487487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0091247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.15948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9376525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0787658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.06036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7710876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.29833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0726623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9230041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.09051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1562805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.18218994140625
############ Running episode number: 917  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.17926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88348388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.79034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.81927490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0707702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0200500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.97247314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.03094482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8980407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9135437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.05218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0057067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1166687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7646179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.04962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9285583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.92230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.2133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0397644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1239318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7767639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7716369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0639343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8448181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9796447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.103271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0055236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.01336669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9530029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.106201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.815673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.03228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.75628662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9826354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1039733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8568420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2033386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8649597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1010437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7633972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.11175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1427307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9428405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.01995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7762145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.77984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.04864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6963806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8666076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.00537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0660095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8917541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.6391296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0240783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8429870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7586975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0345153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1126403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2251281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2449645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0073547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.22613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0341491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9365539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0455627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7851257324219
############ Running episode number: 918  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9275207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.92779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.92779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.87640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0352478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9679260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9837341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.09283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0858459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7270812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9583740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1117248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8589782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.75579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.88116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8526916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9089050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.02679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8140563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.09088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8709411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0264587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.93719482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9470520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.6492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.80615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9021911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.75604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.6847839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0452575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0079650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.94818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9684753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9935607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0636291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.03826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9115905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7916564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9611511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9940490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0026550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.21026611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6748962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1306457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.85137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9007263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0137634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9231262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9382629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0654602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0440673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0103454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9318542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8516540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8405456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.869873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9761657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1803894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.06524658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8883361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.04022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.17401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9757995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0127258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8346252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.6131896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1513977050781
############ Running episode number: 919  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.6357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0334167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.97174072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.83660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9769287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.888916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9778747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8033752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.2257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9609069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0571594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.12310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.94085693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.94049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.06573486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0599060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8909606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9458312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1717834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0627746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8121032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.67425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.2286682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9765930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.07940673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.07733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.03326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.08331298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.00701904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0556335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1798400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1078186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9232482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9596862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9492492675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0207824707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0780944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.91436767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7096862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1526184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8860168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.00665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.83843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.90936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.90557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.19830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9989929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.81170654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.00421142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9385070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1539306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1582336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9986267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.01068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0417175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0493469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.11083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8877868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8727722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0487976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0092468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.89849853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.75543212890625
############ Running episode number: 920  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.90618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.73974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0898742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9069519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.87939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.90728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0432434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0380554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.6659240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1385803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8820495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.23236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9381103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1123962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8418884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9173278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7791442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6878967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9673767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0157775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.06427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9357604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9410705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.91741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9237365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.3125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.757568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.86297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9634704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9603271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.24810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.00579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1223449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9424133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1716613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0002746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9435119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6872863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0236511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.71405029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0525207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0165710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.92041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9812927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.71112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9440002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7511291503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0057678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.04083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1486511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0025939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.98480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.02801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0357971191406
############ Running episode number: 921  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9774475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9268493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.13507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.88775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0063171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0433654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.65997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7331237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0471496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7485046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.53515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0920715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.13653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.02581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.3580627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1897888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9562072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0762634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.88018798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.14459228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9784851074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04449462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9927673339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9687805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.92376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.99560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8292541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.11236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9685363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.87957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8389587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.92236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9358825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.854736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8963928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9198913574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9255676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9558410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0447082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.59521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1224060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0990905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0987854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8477478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.89398193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0980529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9294738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2633972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0823669433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7447204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9228210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8287658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9720458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0753173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.97222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0600280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.73785400390625
############ Running episode number: 922  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9620056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.875732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.92901611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9083251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9335021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0414123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1471252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8590393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2413635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.156982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0762939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.85791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8979797363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9330749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.92279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6858825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0384216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.13482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0093688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0010070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7916564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9942321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9578857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2982482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9779968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.90325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.1391296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1706237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1830749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0552062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.6352233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7628479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.21600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.05157470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0339660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8759460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7113342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.82366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7481384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.09075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7895812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0164489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0193786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.77154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.98504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1297912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.86285400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0881042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9207763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.02880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8709411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.12603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0431213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.05780029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.34088134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.71063232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8722839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0743103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.86181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.3330993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0608215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.949462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.09869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0865783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9244689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.07000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8022155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7263488769531
############ Running episode number: 923  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1869201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9101257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0840759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.97406005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9537048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9460754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0738830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0159606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97308349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.67559814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.07806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9031677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.85009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0128479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.12469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.08001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8478088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.71258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.09112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0710754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0456237792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0593566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9150085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7822570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.13568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8438415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.11761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7434387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.89837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9407043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8804626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9783630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8244323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0304260253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.64776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.3199768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8552551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7711181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.76513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7331848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0122985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.22894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9247741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93646240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0862731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.85772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9314270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.14959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1482238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.10791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.82440185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9751892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.89056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9209899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7834777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.02789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.86688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9006652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0892028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7500915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0771789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2743225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.80718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.95233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1165466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.84375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9621887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8345031738281
############ Running episode number: 924  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1443176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.01519775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9310302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8200378417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.18804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9798889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.6371765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8091735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.91595458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0119934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1649475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.22265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7797546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8467712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.04541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8526306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9400329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7215881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9238586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9912414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1650695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.95220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9667053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0177307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0638732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8543395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1039123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0763244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0415954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0220642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91583251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9736633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.81353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1224670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.92486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9622497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9371643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0108947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1531066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0365905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.64556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9423522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.64276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9754943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7786560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9167785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0267333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.14471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.08148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.043212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8495178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.70867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9194641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.79498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7895812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.73687744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.26123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.913818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.03759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8246765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.08062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0001525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.96246337890625
############ Running episode number: 925  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9196472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1143493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8089904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.94879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0359191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.95465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.901123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9715270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0094299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8506164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.87908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1217956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0032043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8836364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8102111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8179626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.74688720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1304626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0312805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.2248840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.08648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.93121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.17047119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9209289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7129211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1753845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.72052001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9134216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9531555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7708435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8243103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0567932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.89739990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9855041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.07000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8051452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.26556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1357727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0005798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.20416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.852294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7799377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8554382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9478454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7796936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.01043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2151184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.6638488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.02996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.76007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8589782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0844421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.05999755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8534240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95196533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.93218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8264465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.03192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.00213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1052551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0617370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8106384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0418395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.09307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9311218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8096008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.76171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6866760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7528076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0208435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9676513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.00848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.97198486328125
############ Running episode number: 926  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.85345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.83648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7305908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0242614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9119567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8056335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0884704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8434753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7668762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9710388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0340270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.96844482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.87664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0708923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.06463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9339294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.3039245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.78900146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0564270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2825012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0219421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.82720947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9158630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.92279052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8874816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.94244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.04388427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9136047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.75506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.79559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7176208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97393798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0194396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.90008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.95538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.96502685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7757263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.82061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9547424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.07366943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0253601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7044677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.92608642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1492919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0514221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9480285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0536193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1481018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9306335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.87359619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0498352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.766845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1063537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8279724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.12896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8335266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9367370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1584777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0457458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.72503662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.98211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9430236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.03375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0331115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.02978515625
############ Running episode number: 927  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0168151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0338439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.98748779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.87615966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0050354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9671630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7102355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.83111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.84649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8749694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.81512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.15216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.11932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0918884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0867004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2579040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8390197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0010681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8710632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7068786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.86468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8880920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8888244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8793640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.10943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.4895324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.87542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7471618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1199035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.81378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8367614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.00994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.02001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.83538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9635314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0367126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8234558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9192810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.96148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.143310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.97991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7772216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9205627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8447570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.97064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8789367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9617614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8389587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.72662353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7256774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8625793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.6935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.75311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.22808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.3116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0884094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6036682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9355773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7791442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0797119140625
############ Running episode number: 928  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0894470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6780090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9889831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0458068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0449523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.86517333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.74591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.92755126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7552795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8731994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8379211425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.81866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8708801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8437194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1736145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8674621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8584289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1156311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.87127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8687438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1565246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0148010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0390930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9691467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9527282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9250183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.03558349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1410217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9057922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0962219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6568298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8679504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0970764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9155578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7510070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.09649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6523742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.84765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.77374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8384094238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.93023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0169372558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.87841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.10992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.79833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8323059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.87060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.87762451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0484619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.94451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0751037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.21337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9671325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9908752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.07635498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.87158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8379821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.51934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0337829589844
############ Running episode number: 929  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.73223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0563659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0909729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0903015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8563537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0047912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9770202636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.09393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9951477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7192687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9196472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.79400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0237731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8460998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9862976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1513977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9438171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8742980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8058166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9584045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.2947082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8751525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.84735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.86358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0125427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8435363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9429626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9203186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9056701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.75115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.07257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8869934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9657897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9218444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9394226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.12109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9164733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.18023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9953308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0650329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0161437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.80804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1407165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8150329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8368225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97332763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.91668701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9700622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9815979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7989196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.10076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.06427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.89727783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0783386230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.88092041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0945739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9349060058594
############ Running episode number: 930  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1105651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0423278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.77972412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.6532287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.20343017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.26995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.97589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8148498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.15631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.95770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0191650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8717956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0256652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.76837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1593933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1571960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9242858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0946350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0856018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9380187988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.91009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.72576904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0762634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0007019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.92266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9112243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.80615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9900817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7213439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1684265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.175537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0177917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9178161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.83050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7251892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.90960693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0541076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8963317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8588562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.71051025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.02581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.99615478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7562561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.01446533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.08453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.85858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.01885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0065002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8128967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8675842285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9646301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0887756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7997741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.79296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.11810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.70751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.03155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7879333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0492858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.6076965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.858154296875
############ Running episode number: 931  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.98394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8222961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9069519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9503479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8714294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.12701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.3265686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9251708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.21282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0072937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.77276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7787170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.73052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.98211669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.784912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0699768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1721496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8939514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.78607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0608215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9776916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0653381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2392883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.90643310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9298400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0345764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7748107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.974853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.03350830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.09002685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.93035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.95660400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9406433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.2411804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9636535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7290954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9510498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7536926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.057861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.79730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7129821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0724792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.02947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.78289794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94036865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9744873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8293762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.87957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8710632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.93695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.76190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1990051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7228088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.74627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1308898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1188049316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0178527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9908752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.96319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.2032775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0091247558594
############ Running episode number: 932  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7835388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8433532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9997863769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7244567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.98876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.7927551269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9834289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0325012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.13714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.04669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0849914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.11907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.82208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8196105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.975341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1641540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7792053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0403137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2974548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0731506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9840393066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1663513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9514465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8083801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.76947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94940185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.07073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8807678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9316101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8808898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.86474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0032653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.03717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.05047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.75579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0873107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9604187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1087951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6958923339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1262512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7488098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.98333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9606628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.99835205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.14532470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.96600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0670471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9444274902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0705871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.02691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9985656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.83953857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8651428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.81683349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9548034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.84637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0790710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9488830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9432067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.98114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7155456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0609436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.96246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8487854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.92633056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.5538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1181335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.90472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7789001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8010559082031
############ Running episode number: 933  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2218933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92095947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0171203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.16485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.85284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8768005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0467529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0641174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.98382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9719543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0008850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0788879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89788818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9556884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0354919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.96942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.97833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7381286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0033874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9857482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9828796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8730773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.87152099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1203918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.81072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7911682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1189270019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0410461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7044982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8970031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.7788391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.84674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.08062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.72625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.11297607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.89892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0145568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.06005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8817138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8236389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0246887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0556945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9451599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.08148193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0500183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1057434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7347717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6802062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8918151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1969909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8486022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0307922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9862976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7315979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.931396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.861572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9250793457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9351501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.93133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.10394287109375
############ Running episode number: 934  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0661315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8635559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0328674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1007995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9298400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.94482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.78167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.838623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.11041259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7997741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.03460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.87384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8598327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.83282470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9808044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92706298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1805725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0073547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.84527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0871887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9534912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9526062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.12884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.19732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1081848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7681884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8218688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.68896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8965759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9444885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.07867431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9383544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1719665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0305480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7914123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7832336425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.14697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8313293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0149841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05682373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0299987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.83599853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8250427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8532409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.89886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9390563964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8045959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9378967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8163146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.05877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.20928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8526611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.04425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.83721923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.81109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0942077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.70068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7001647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.963134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.96844482421875
############ Running episode number: 935  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.89117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0538024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1422119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0321044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.07208251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1505126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.6624450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.966064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.05621337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.87701416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.6640319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8240051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7750549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0207214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9280700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9964904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0171813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.18841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1777038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9361877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.03472900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.80902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2459411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7403869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9441833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8183898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8639221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9128723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9918518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9214172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.872314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9588928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.05023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0893249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1954650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.68896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0012512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8265075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.6942138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.85302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8686828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.13677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0792541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0631408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.6673889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0203552246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6593322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1615295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8197937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.10748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.85693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8999328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.08197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0030212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1745910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2140808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.06365966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.92034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8782043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0369873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8677978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9263610839844
############ Running episode number: 936  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.03021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0930480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.19561767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.01641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.97271728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9833068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9535827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.84075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9534606933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1022644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0369567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0837097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0811767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.945068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.16143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.72637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.04150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9667663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8488464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.6972961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.05426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.85137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.2311706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9895935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1040344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1767272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0955505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0578918457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.78912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1115417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9766540527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8330993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0132751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7831726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.01611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9158630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.93682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9791564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9663391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0514221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9506530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6708068847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9867248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0003967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.93780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.79754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0716857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1705017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9886779785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0850830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.98004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9151611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7641296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9557189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.07647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7543640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.76116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8655090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8913879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.58740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0776672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.6740417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7621765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8608703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.80084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.00213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8974914550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.2744445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8688659667969
############ Running episode number: 937  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9186706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0642395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7343444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0589294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.02716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.81011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87420654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8531188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.99188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0082092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.92388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9587707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9586486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9289855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0490417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.06317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.95684814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7771301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8319396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8446960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.90936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.204345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9494323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0902404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.15814208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9094543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.74993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9812316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0486145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.639404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0215759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.21728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.6560363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8726806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.09112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9466857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04351806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9023742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.04913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7894592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7950134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.96331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1125183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9017028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8285827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.06939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.87506103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0096740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8360900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9427795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1036376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.24114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.11334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0267028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7871398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9208679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.99542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.75262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0986633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8114929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0505676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8497314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9682312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8427429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.88079833984375
############ Running episode number: 938  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.15972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.94683837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0093078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1732482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8815612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8866271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9066467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2717590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.10601806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.06878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9139099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9790954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.14202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9298400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0821228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.17041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92193603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8807067871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.6927185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.10260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.05426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8785095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9234313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1438903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.08258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.96185302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.04315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8700866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8166198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.98297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.06671142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7718811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.92291259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.90771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.08831787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1286926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8603210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7409362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0560302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.96258544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0768127441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.81134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9581604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1941833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.83453369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0533752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.79962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0666809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8833312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1070251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.68048095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8845520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.929931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8087463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.12054443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7441711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7420349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.99078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8995666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9279479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9610900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0735168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0967102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.83062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1340637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8553161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1606140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8800048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.77386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.86578369140625
############ Running episode number: 939  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9256591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.88922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.97796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.3203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8702697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8544616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.02154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.86334228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7957458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9123840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8042907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8158874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1221618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0378112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.98284912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1615905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.08697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9947204589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1178283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.10809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0940246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8724365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6885070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.87298583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1982116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2339172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.98040771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.93975830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.16229248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.05487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.754150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0596618652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.04150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.02056884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0642395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7119445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.76812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.93341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8384704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.78753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9849548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9892883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0548400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8215026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.06109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0431823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0131530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0963439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.06787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1401672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1161193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.99053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0936584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.87469482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.90069580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1738586425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.76806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.84027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8962097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8339538574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8607482910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.89495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8511047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0569152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8230285644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2163391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.80987548828125
############ Running episode number: 940  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8385925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0478210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.00860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9338684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7203674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0511169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.6424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9430236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0926208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9097900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1462097167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7944030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1328430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.01513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.07135009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.85345458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7982177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9539489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.00311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.93890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0735168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6971740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.79339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.04156494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.04241943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9950866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0594177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.07818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2340087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0334777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.06805419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8923034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0321960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1745300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0163879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9866638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.07550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.14886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0492248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.03387451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.84149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9437561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.95831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1228942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.27008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8214416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8995666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.75054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0479431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9363098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8594055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0269470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8841857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.85986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6140441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1199645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1020812988281
############ Running episode number: 941  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1056823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.825439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8563232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9456481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8999328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8609313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0613708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8756408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.07244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.00048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9144592285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9554748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9869079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.95513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7943420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7573547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7335510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1750183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.806884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.04339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.6463928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8372802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9516296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.90087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2007751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.87451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8998107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.99932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0587463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9672546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9249267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9002380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.82177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0840759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.00823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7953796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1238708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1339111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.020263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.23248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1354675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9117431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.69232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9165344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.87518310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7297058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.11993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.896728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9969787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8614807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0439147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9461975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7373962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.14764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0485534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9552307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9410705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0680236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8436584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0797424316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0578918457031
############ Running episode number: 942  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9762268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.95220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8830871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1628723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9544677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.93756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.16851806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1498107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9102783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9662170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.70489501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8506164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.97894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.82806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.71966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9039611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.98876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7634582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0650329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.83587646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8506164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9690856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2220153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.3025207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.09844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1207580566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.91046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.027099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.86383056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7725830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.886474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9612121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.28961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9002990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1006774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.04486083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.77862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8175964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95880126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.96453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.03240966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.09307861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.82537841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.77252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1963195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7442626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.05609130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.70977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.07928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.94464111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1556091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8484802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.71295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9127502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8690490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.98004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0572814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1568908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9222717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9873352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7866516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7388610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0765686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.6834411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8640441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8470153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8371887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9962158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.05694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.89813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.839599609375
############ Running episode number: 943  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8066101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.85980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.6900939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0251159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.03857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8492126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.92889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.04254150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0044250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8503723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8425598144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.6942443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0975036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.93896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7496032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9228210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0303039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0165710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8375244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.76898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0480041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0771179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.91119384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1225280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.87176513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.02227783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.129150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8960876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.68359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.84930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9545593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.63824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9320983886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.02093505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9947814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.92083740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8975524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.2522277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.05413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1534423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8715515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7745666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9454650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9481506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.89190673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0849304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.91461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8861999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8843688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.97796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.07830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1333312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9195251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9622497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.06512451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9256286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9742126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8465881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8930358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0137939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8476867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.11199951171875
############ Running episode number: 944  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.98016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8135070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1275329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.98968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.79278564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.97930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8398132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7012634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1558532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8026428222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1136474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.6717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.08123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8065490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6949768066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0265197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1165466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.82952880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1251525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1022033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9577941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9903259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7015075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0949401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7625427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.04180908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9255676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9986877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9721374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.96954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0996398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9395446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0695495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9498596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0101013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.5884704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8702697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7494201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8573303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.83837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.85076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8330993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0557861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9536437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.028564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1977844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.95086669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.12493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9674987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1678771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.154541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1714782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0816345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1585388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1168518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0846862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0401306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0721740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9049987792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.94879150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9023132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0428771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.112060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.96856689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9703063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8318176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1223449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.75054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0339660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.99005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8639221191406
############ Running episode number: 945  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9786071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1404113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9069519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0081481933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8744201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9763488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1430358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9072570800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.10540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0566711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8423156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8951416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.86590576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.132568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9787902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.6744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1169738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02801513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8261413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2113952636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9842224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.99346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.90545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8738708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.6786804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8840026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.16259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0184020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0391845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9422607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1182861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0987243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0520324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8536682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0975646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8705139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9747619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8691101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1545104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.2803649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.13812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0328063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.87835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9685974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.95733642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.7752380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.97998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04193115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1825256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8342590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1080017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0302429199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9344787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.2435302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9022521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.10186767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1294860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9503173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.87286376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9194030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0417785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7502746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8924560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.65283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.08489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9048156738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.6376037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.142333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8885192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0861511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9500732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8548278808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.82818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8726501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.03521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.96783447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9414367675781
############ Running episode number: 946  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.76239013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.76043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8639831542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9087219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0435485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8808288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.89471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9366149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0102233886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9615173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8773498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.82415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9654846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.08514404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0599670410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.16314697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8397521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8892517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.88330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9665222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0610656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1054382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6693115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9908142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.898193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.95111083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7569580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9446105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.12353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0289611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.02325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9079284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.09130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.95404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.90313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8316955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2196044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.91741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.14019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.86767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.91131591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7884826660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1184387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.18524169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.01641845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9161682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.04620361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8717041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0408630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.84124755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.06719970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9565124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.86187744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.682861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.3525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.13580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1308898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.82244873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.6651916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9498291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.84539794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9156188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9689636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1935119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0877380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.75555419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1192321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1648864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.80645751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8382263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0042724609375
############ Running episode number: 947  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.71661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.11761474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8451232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8752136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8979187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0426025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7865905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.85052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9546203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8894958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.10223388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9264221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9974365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.781982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8481750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.07611083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.94281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0336608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.05865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.80364990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.07177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.08251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.99322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.07891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0306701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7735595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9648742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1127624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8097839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9067687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8462219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9346008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8790588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.89697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7131652832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.733154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9964294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.88336181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.92547607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.84222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.07965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9317321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.15435791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.11602783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.82391357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.89434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1343688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7657470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9315490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.85540771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8658142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.15802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8585510253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9723815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9706115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9706726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9092102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1215515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.92425537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.90545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.84332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9960021972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.04473876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9243469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9764709472656
############ Running episode number: 948  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.86163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9350891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.91741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0325622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0733947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.23602294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9179382324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9308166503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.2740783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9541320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9730224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.09674072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.93670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7611999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8508605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.05010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0152893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.03814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.94659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.03192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.6871337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.05926513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0694885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.10858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7518615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0575256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7692565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9341735839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.95513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.80865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8733825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.88995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8663024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.6961975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0846862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1217956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7618713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0523376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1343688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9662170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.744384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.78009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.96209716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9543762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.2283630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.02923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.95172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.183837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0812072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.06494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8763732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9814758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1071472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8105163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8356018066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8247985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7939147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.87957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7608947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0045166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0196838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8938903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1101379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8753967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7879333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0508117675781
############ Running episode number: 949  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9163818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.64990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1735534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.6982727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6478576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1391906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.06378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0242004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.05126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0522766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9634704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.87750244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7942199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9199523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.83160400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7677307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.00103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.16943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8403015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.84478759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1317138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8481750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0952453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.06488037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9314880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9733581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1665954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9510803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7382507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.96014404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7696838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.87969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9714050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8369445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8666687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.87811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8949279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.06475830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.11474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.08123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.88360595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6186218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9342346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.02008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7796936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0069885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9113464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0566711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.15667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7595520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9255065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.95257568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.93792724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9180603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9624938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.88226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.94268798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0041198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.19976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0070495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0964660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.88665771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.7383117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8025817871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8121643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1519470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7750549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9821472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8145751953125
############ Running episode number: 950  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7276611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.90667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0494689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.95074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0450134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0146179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.15325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0738525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8780517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7747802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1231689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.00897216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9725646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.08673095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0671081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.09130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9433288574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.068115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7495422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0531311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.83807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0736389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1394348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8984069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9725036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.2709045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0335998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.76983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8179931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.12774658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7170104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9286804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9981994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9411315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.11480712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.06793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.94024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.86029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9236755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.90338134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.09356689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.98419189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9906005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.83624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0773620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.92333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7686462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.00872802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9817199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.77850341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.82623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.81072998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9403381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.91790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.84423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7898254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.09716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1202697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.86712646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.864501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.96649169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9891357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9700012207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.85498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.07928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1567687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.05889892578125
############ Running episode number: 951  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.89306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2143859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.112548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98760986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.86541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0311279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.05218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.2150573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1358337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9049072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.80499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9816589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.15313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.97857666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.96661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.78265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.95556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0213623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8167419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.956787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9472351074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.87127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9070129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.7005920410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0218811035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.03594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9692077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.63739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9875183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9944763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9259033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8858947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.13134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.90997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.17364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8569641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.89447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0138854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1444396972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.98309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.94842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.92718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0718994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0810241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6379699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.6406555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8353271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8279724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9725036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8957214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.09393310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2386169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9378356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8330383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8877868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.01953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.2054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9765319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7207946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8910827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7969665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9726257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.03155517578125
############ Running episode number: 952  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9394226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0885925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.91424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.81341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1172180175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.87457275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.89691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9992980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0453186035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.87652587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.92108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.12799072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.85003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9321594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0189514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.797119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.13470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0538635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9708557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1966857910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1089782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1528015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.03704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.14447021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8772277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7766418457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.91021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.83685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9261474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9436340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7956848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1730651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7969055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8517150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.76300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0412292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9831848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0472412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.837158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8058776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1756896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.3204650878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7742004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.09912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8412170410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.2207946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9104309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8902893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7930603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8548889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9368896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0291748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8641662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9105529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8444519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.98822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9097595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8225402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0152282714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9946594238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.94189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0024719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.99810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.77935791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1287841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8553771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1578063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.11798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.99652099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.95947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9495544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.6925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.91815185546875
############ Running episode number: 953  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0848388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1111755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1726379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9250183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0445861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0915222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1202697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0892639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7042541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9410095214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.88641357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1174011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.02239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0887451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0689697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7664794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1031188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8896179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.6521911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0594787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.99664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8305358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.95452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1062927246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9465026855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8779602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.84130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8550109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0417785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.97222900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.90582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9398498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0017395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7803649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8412780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0579528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0629577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.97601318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1565856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2032470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.2272644042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9001770019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.7840270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.84051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.95941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.92620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.80133056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0360107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8378601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9794616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.798583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8744812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.56201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1347961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8494567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1412048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8483581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1069030761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0387268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.12646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7879333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8084411621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.05316162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7959899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.05450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9260559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0278625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1438903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7169494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8996276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7432861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9750061035156
############ Running episode number: 954  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.00018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.7146301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7813415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9881286621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8933410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6754455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.02252197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.89996337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7867126464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.97979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.5080261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93450927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.82965087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8924865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8509216308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.05194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1896057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.08221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.98236083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7301940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.2806091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1974792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0687561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.98162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8821716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8654479980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9803771972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6022033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94158935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8780212402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.79681396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.11077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.92242431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.99267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0343322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.98046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.95550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9902648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9004821777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9241638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7777404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9316101074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0103759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0019226074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.15130615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.92718505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9324035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.93402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0611267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1758117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.00592041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9247741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.16412353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.856201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8790588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0216979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.73565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2298889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.72637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0417785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.66351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8124694824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9717712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1303405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0370178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.06060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.81951904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.00103759765625
############ Running episode number: 955  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1723937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.1180419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0231628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9022521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0379638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.92572021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1155700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0508728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0490417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1300964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.78680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.99224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7821350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0316467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9468688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9201965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9014587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9490661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9223327636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.14483642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9397277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0196838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0697021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9718017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.80438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9047546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.970458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8866882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7820739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9165954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.10546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.94403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.01055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0419006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8817443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.2425231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8539733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.79339599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0772705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1157531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0731506347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9622497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0404357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.91558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.06695556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9415588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.18829345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.66888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.06768798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.93145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0896301269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0743408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.02099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.14642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.83319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2842102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1484680175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.01947021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.75164794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1943664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.84808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0405578613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1190185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9705810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7670593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.87628173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9810485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1183776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7792053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0221252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0356140136719
############ Running episode number: 956  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.05487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9726867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.71697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1775207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8302917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9654235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8864440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8648376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.938232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.992919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1604919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7470397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.81988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7040710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0481262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.89764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.99871826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1559753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9186096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.92767333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7793273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0079040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8758850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.280029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1711120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8357849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8359069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.080810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9643249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7983703613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0220642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0862121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.02545166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.01031494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.75115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.00921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9931335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8086242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.2860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.09954833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7195739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9919128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0054931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0085144042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1622619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1639099121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1221008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.90777587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9573974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.90802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1527404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.06207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.02166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.86279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9684753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.00958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.93634033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.81707763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.92877197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0476989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.00506591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8818664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.86431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8326721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0036926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9186706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9202575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0725402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.91082763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0838928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.78228759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9996032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1256408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0406799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.87860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0422058105469
############ Running episode number: 957  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.94915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.03436279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.18157958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0129089355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1177062988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.86126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8792419433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7478942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8878479003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.01580810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.88873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.93414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7250061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0585632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.85943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8200988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0915832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.02593994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.93359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.73779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.07659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9883728027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9551086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8282775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1510314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9306945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9497375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89752197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.16741943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9906311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.77655029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8226318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.07269287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9148254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9317626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8428649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.95184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8775939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7952575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1361083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9118347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9331970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.2037658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9886169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.87811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.93841552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9508972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0887756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8571472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9443664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2254333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.6709899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0589904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.102294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9332275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9762878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0489807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7959289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.09832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0977478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0589294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.99456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.2276916503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.6430358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.7701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0019836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.12322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.82684326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0981140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9248352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7957763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1266174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9494934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8165588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1120910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.87408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8545227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9960021972656
############ Running episode number: 958  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.04046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.03009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8301696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0684509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9000549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.04803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.96697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9217224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97283935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9723815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.035888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7847595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.84429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.90765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9424743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.86627197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.95306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.2890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8536071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0048522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.065185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0308532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9342956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0892639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9286804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8019714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.03271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8999938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9385986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9847717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.03265380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0227355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.97900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.03564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7539978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9015197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.58868408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0500183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0825500488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9655456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.761962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1667175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8563537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.6623840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8757019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8055114746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1507873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.895751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9222412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.74420166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.09844970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9855651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8860778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.992431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9991760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.75933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9418029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.01544189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.14288330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7779846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0939636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1780700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.21282958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0112609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.91021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.93731689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9474792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0285949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.92474365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9780578613281
############ Running episode number: 959  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9649963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.941162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0161437988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0650634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9006042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8620910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.01312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.93096923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1346130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.60235595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.915283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9107360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.02069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7785949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0675354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.00250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.009521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9267272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.90911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0035400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0463562011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.75445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9700927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9626159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1887512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.95208740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.68170166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0008544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7061462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.02496337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8431091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.059326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7413635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0032653808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0144958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.74908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.68121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8380126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8873596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.10723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.07830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9656677246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9386901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7930908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.89129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7541198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9114074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.77410888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.78521728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1038818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.59716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1539611816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.08428955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.99609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.2050476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.3100280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8848571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.11029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8135681152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1056213378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.845458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9421691894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.92962646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.73516845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8838806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9439392089844
############ Running episode number: 960  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9670715332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0865478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.01654052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8704528808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9930725097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8846130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6900939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0119323730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.079345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.08624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0176086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9776611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.64581298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8179016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8244934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8474426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8642883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7200927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9560241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0674133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.01800537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.94403076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7964172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7491760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0274963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7042541503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.01824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.82257080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.93060302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.89569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.81884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8586730957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.03369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8053894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.11932373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1202697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1378173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6544494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.86749267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9956359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8643493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.69793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.12786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6117248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1089782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7966613769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0063171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.93536376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.73486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.10296630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.816650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.94012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.91290283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9325256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1321105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8973388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0675964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.03460693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04608154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0674743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.06011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9590148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.09063720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.96527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.79217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9958801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.89276123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9327087402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0791320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.76220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.1483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9211120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9100646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.2209167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9869079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0045166015625
############ Running episode number: 961  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8678894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0909118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9781188964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8500061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.08770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1485595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8422546386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7987976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8959655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.20550537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7889709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8894958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9291687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8426818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9300231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0658264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8355712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.88861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.08050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9620666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.860107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8968505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9623107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9163513183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8819885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.19732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0060119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.89630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1351623535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7330017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1888732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.72119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.05340576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8338928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.11236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7772521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0404968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0917053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.00439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1075744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0480651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0230407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.88079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.88006591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9928283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9229431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.88568115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7343444824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.19903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7857360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.06201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9425048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8708801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.1235046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9555358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.88909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.91143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0061950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8041687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.7855224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9145812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8138122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9630432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0061340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8766174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8881530761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0410461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0936584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.80950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07281494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.90704345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0752258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.02069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0016174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0810241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7876281738281
############ Running episode number: 962  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0087585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8218078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0329895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8515319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9185485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.90625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.82989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7320251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8669738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9159240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8981628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8287048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8699035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7403259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.78167724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.74346923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9949645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.90057373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.14959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0088806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9729309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8758544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0821228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8621520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0258483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.02557373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8149719238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2397155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9257507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1653747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.98980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02862548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0279846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0284423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.08624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9648132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8915100097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.12933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.90228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.3087158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.95220947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0496520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1053161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.17108154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8487243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1751708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8815002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.120361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1426696777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9950256347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0284729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1912536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.17230224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9507751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.02178955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0503845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8649597167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.92205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.900634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9314880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.97906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8049621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.00189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8814392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0618591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.14373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.19158935546875
############ Running episode number: 963  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.7551574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.92535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8937683105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9252014160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.04656982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.94488525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.98699951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0193176269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.7720031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0586242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.88616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9985046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0052490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.87640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9576110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8896789550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8990783691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.91400146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.91278076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.73553466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.12255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0261535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.75091552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9891662597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.2149963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1456604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.94122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9041442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9576721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9156799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.05145263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.82708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8207702636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.94781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9917907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9585266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1015930175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8459777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9425354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0628356933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.93487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8183898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9529113769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.84197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8545837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.02947998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9376220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9513244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9728698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0837707519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.78704833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9585876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9988098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.95977783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9825744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8580627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9248962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.810302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.97137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0665588378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.6856994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.17626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.08782958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8467102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.2470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.85406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.2306823730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.07623291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9212951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.00933837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7490539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.01776123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9166564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0968933105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.79345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8786926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0313720703125
############ Running episode number: 964  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0248107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.00677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1968688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.02386474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.86029052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9949035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.87567138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0175476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9455261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0058898925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8529052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8096008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1244812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7737121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9219055175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0557556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.14874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.94482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.97509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1695251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1007385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0019836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8208312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.00225830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.83807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9120178222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.11151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1326599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.02471923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.09698486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.09722900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0703430175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.06158447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1556396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0090637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0043640136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.95098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8219909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9834899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.88037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1844177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9867858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0941467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.11279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.87603759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0050354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0805358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0064392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9907531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9661560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9514465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7752380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8796081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9853210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8858947753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.26531982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.03594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.67779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.09375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8942565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.2212219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.89202880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8680419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9681091308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0940856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.6978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9471435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97955322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.16357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95916748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.84820556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8872375488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2066650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8465576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8551330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7461242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0818786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.22821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.00146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.928466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7902526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8656311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1872253417969
############ Running episode number: 965  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.77880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0136413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9574890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.95635986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0221252441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9541931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.83172607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.09136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9278869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.01959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0552978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0030517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9263916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.958251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8863830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.99908447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.86358642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1377868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.84759521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0885925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.79559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8577575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9832458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.98443603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1207275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.92144775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9189758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1838684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9244079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.096435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7876281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.02734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.90277099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1861267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.95843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91778564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.6924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8918762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0072937011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.07501220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0445861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8674011230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.13690185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1063537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.96343994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.2135314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0906066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.14898681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8645324707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0182189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.90447998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8711242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9600524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9161071777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.01104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.81964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9447326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.27734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.97418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0999450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9280090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.13824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.76568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.96905517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.81085205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8623962402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9176940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.1266784667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1788024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.08685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.01959228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8321838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0696716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7993469238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0224609375
############ Running episode number: 966  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.99566650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9669494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8528137207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.062744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9932556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8094787597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9876403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9095153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9983215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1313171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.15679931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0935363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.025146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0071105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9464416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0036315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.84869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.92828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7206115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.7644958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1312255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8987731933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8377990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7691650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.008056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.764892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0291442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.92840576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1941833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0096740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7508850097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.2418518066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0852966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9118957519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0187072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9002380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.7605895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.84490966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0677795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9445495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1647033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.11328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8013000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6705322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.61944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0191345214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.000732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.97991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1075439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1359558105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.02166748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.89947509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.81591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8512878417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9717712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9344177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1315612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0321960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8186950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.94952392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8350524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1819763183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0615539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1839904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.83734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.81793212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0644836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.98223876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.7890319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8737487792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0366516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.6553039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.06842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9495849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.00921630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0184631347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.99444580078125
############ Running episode number: 967  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8399963378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9670104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7811279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.87091064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9331359863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8224182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.15576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8625183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9167175292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9613342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.73394775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.25201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9491882324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.7783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2093200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.99224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.11529541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.88482666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.03863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.92596435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8019104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8875427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8502197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.83416748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8739318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8333740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.89874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9972229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.92364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9017028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9151306152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0519714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0350036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8661193847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.00726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7762145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.1011962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9993896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0846862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.86468505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.17523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0592346191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0591125488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8030090332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.99176025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9911804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.88311767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9651184082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.92987060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.11181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9044189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0246887207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7920227050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.96978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0396728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.84326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.89312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0221862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8533630371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.2099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0780334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7618408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.16448974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0451965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1927795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8609313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8783264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9630126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9051513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.189208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9001159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.92535400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.6647033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8123474121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.87322998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8292236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9939880371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9554138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.91552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.99896240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9541015625
############ Running episode number: 968  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.94512939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.87994384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1128845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8983154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.01007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.2591247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.95263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.019287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2640686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.78619384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.021728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8863220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.03802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8887634277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.00714111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.89031982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.2375183105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0382385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9012756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0726013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.89923095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9994201660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7229309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.07916259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0358581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.95526123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.88720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.94708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.79742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.03948974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0794982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7847595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0180358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.2236022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.94793701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0011901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.82550048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8652648925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8218994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.01568603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9547119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.04669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9678649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9814147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8492431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9659423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.03076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.91546630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.11419677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.00518798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8402099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1000671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9112854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.6407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0461120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.95477294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0206604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6054992675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.25262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8472595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9580993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0315856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0410461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8594970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8952941894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.18487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7123107910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9239501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0651550292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.1061706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.70404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88275146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.6434020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.93243408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.08038330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9054870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.83221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8669128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.95416259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0709228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.88519287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.70135498046875
############ Running episode number: 969  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9751281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.06597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.91351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8771667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.85443115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9076843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9057312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9244384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.75836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.20751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0865173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8157043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.073974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9462585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0998229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.805908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7408142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.949951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8877258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0205993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0487365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9078369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0141296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8555603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.944580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.00885009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.05023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9642028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1247253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7999572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0577697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.07769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9132995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.93658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8883056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.09735107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9676818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.16766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7355041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.08892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0098876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7908020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2839050292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.950927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0250244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9215087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0907897949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.84246826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.07098388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0810241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1877746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.7833557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9157409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9360046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.12261962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9456787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.95135498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0647888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0492248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.97589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1436462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.80450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8249816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9704284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9046325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.75750732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7995910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.84100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.01458740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.02777099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.87921142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.97674560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8883361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.01409912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2699890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0133361816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.5694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8802490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1695861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0809631347656
############ Running episode number: 970  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9161376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.75244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8053894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.15362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8437805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8221740722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6877136230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8852844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.00836181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8147277832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0857238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.71685791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1330261230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9018859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0186462402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6087951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0053405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.16461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.67401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9452819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.07159423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.82769775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8966979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8905334472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.76348876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.01385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.05377197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8272399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7709045410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0766906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9820251464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7928161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9781799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.93310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9496154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7814636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0721130371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.86920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0400085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.77880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9825134277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.96923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.92034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.63623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.94219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9758605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.90576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8871154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.07745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8935241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8346862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.08612060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0539855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9479064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.895263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.767822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.6241760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9881896972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1628723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0916442871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0138244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0274353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.061767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.17864990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.97216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9295959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.7728576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8327331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.91790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.01025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.94586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8937072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.06854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.88763427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.08660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9148254394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0999450683594
############ Running episode number: 971  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8160705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9757080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1106262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9910888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0557556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6429748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.92852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.92205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.60845947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8423767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0763854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8340759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.76275634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0650329589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.2510070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9549865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.14227294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.75201416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8491516113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9836120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.94775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9486999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0160827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8209533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0829772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8986511230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.12213134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9848937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0630798339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9477844238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7936096191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.983642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8581848144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.93707275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.1031799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8655700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.83660888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9383239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.937255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.2639465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.80511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9888610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9442443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.6781005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1167907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.86541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8121337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7687072753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9732971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8080749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.055419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8489990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.90203857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0946350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9743347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0462646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8614196777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.7804870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9105529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9419250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8794860839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7550354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.7355651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.86126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0497131347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.94696044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.2099304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.012939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9152526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7127685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.76824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0633239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.03662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.59326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8874206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1631164550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.98626708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7500305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8661804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8706970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0826721191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9920654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8365478515625
############ Running episode number: 972  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.969482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8155517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0007629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9504699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7301330566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.7745056152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.09808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.920166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7860412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.91064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.97552490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7862243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1341247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8930358886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0986022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9685363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8732604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0973815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.02789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0110778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.7167663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8485412597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9705505371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8503112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1619567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9051818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.88031005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.04742431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0441589355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9298400878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9053955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.886962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0147399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.87042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9656066894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.99273681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8207092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.04693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.1301574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9377746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.891845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8657531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0168151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.86907958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.016357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0798645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9129333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8847961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.85516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.97698974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.89642333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.1433410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8978271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0530700683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.099853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8564147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8760070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.1685485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.14434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9439392089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9153747558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.81890869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.96002197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9637145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.04669189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9171142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9650573730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.976806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9862060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9299621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7948303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.88531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7839660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0219421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.18487548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.14154052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.70013427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.88946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.95452880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.81878662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0273132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.104248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8092956542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.97625732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7936706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.1803283691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.97564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8597412109375
############ Running episode number: 973  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9524841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.96331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.04986572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.88140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.94415283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1786804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0775451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.07708740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.91015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.1263427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7412414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9046630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1744079589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9644470214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0989685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9811706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8085632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8513488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9851379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9284362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8746643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9995422363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9652404785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0890808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9251403808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8396911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.86151123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9501647949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.5452575683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0051574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0256042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.85516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.86474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0213317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.90020751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9338073730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8681945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9770812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.82513427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.77716064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9344177246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0685119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.1390686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.91766357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9439697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9333190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7668151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.74554443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.08697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0989990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.83984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8742980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9324645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.19366455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.71624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7796325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0372009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1711120605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.01934814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8666076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0271911621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.02117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.98114013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0854187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9706726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.106689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.99627685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.79754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9416198730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7706604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.00823974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0566711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.13818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0220031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9664306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1040954589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9486389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9973449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8417053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8994445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.6371154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.07427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9499816894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9910583496094
############ Running episode number: 974  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9719543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.02899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9973449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2190246582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.71331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.936767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8126525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.82342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9043273925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8971862792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.93017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8296203613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8701477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.87353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9379577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0194091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0746765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.004150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8033752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0551452636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8951110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9829406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9513854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.79876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.152587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8967590332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.93927001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0257873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0301513671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1824035644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9824523925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0975036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7922668457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6218566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.7557678222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8607177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8739013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9451599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9490661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9375915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.2160339355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9081115722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.00665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9853820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8791809082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.00238037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0853271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8523864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.92401123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.84600830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.97650146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8938293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0191955566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.80987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.09442138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9455871582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9784240722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8730163574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.033935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.83477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.05487060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7182922363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.87115478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8469543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0863037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0013122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9934387207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0115661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.882080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8577880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.95611572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0770568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.99237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.91986083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.1137390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0382385253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9214172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1363830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.7364501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8466491699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0198974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0589904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.01068115234375
############ Running episode number: 975  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9032287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.6767883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0430603027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.14813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.08740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8220520019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8743591308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.90869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9695129394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.99029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1282043457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0108642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.09637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8470764160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9060974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0252990722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.11968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.01727294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.0021667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.06329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0323486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7701110839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0192565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.009033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8034362792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0840148925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.97607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9686279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.84918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.90362548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.5433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8267517089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.12237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.93267822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8891906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.99432373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.7679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0062561035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9421081542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9564208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0426940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.93072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.81280517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.2518615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0111389160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.02532958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8754577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9970397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.11553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.7176818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9822082519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9275207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.7808837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9498596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8844909667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8924255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.014892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.07415771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0602722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.05841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.8489685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7885437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.764404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0876159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0177001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8004455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.97467041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.94708251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8103942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9956970214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.81463623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.018310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0068054199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.2587585449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8997802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.94073486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80450439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0123596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0953063964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8248596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.05120849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9674377441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8712463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0799255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9635009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7257080078125
############ Running episode number: 976  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.868896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.03961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.8988342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9522705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.99127197265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.2290344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7569885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.07489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9337158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.04541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7955017089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.84039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7512512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8232727050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0578308105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8341064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9535827636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.96368408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.0168762207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8066711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.7574462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0348815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9434509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9042663574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9852600097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.98541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.7374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.2577819824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.007568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.72894287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.14434814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.78192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.09625244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.95672607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.81414794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1634521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.02606201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.1092224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0535888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8681335449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.1007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9064636230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9020690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.01007080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.963623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0129699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8849792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8699645996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0684509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.79644775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.09136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0309753417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0216369628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.93988037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0785217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9789123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9129943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.81536865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9862976074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8188171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.02703857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0235290527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9744567871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0724182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8948059082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9553527832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.01593017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9560241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0006408691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.80413818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9491271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.97515869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9595642089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.63592529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8558654785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.06884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.01910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0059814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.79852294921875
############ Running episode number: 977  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0243225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9223937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.88116455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0133972167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0673522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0340881347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.85662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0504455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0197448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.8682556152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8761901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.8458557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1551818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.92138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9857177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9579162597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1863098144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0063781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.91680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0006103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.1644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.97357177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.9136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.88287353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9414367675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.90997314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.74493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.6562805175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.05206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9879455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.61798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0183410644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0223693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.04913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.69677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.77349853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97613525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.84832763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9212341308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9532165527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0706787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.2594299316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7425231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.91156005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9686584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8720397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.86236572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.0982971191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9890441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0664978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.05938720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8804016113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.11688232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.863525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.05255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.93115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.00653076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0160217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8882751464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.922119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7462158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.22564697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9988708496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.04168701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0260314941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.94866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.96527099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.98431396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.86553955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.71832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9781799316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.2615051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.843017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9327697753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.04913330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.2207946777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9359436035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8861999511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8664855957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8596496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9482116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.82122802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9233703613281
############ Running episode number: 978  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.84234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.85174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0196228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1313781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.93609619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9820861816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9338684082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8185729980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9011535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.97119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0646667480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8238830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.95831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8734130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1605529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.02288818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.067626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9856262207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1220397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.12188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.23583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8926086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8564147949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.94622802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.13714599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9546813964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0383605957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7773742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0350646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8658447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7823486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.7566223144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9170837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.04400634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9747619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9649658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9367370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2015075683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0831298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9761047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.97802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.88140869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.060791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0494384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8978576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.96893310546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7590637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0597839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.07147216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.964111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0672912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.92919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.88580322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.79412841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9975280761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.83221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1857604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.08648681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.048583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0035705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1167297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.82989501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.99114990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9980773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1059265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8747253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.7513122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1783752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9395446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9269714355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0065612792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8826904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.81610107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.82647705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1123352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0293884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0379333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.00335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.813232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0688781738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0462951660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.98248291015625
############ Running episode number: 979  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9178466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.10675048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.1514587402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9771728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8898010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.05804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1087646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.15789794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0618896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0020446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7646179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8421325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.91680908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0357360839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1365051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.20147705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9640197753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9693908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.83074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.2351379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.1098937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.7705993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0163269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.1860046386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.83843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7429504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.84808349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.03131103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.96820068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.885986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.919677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.09918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0047607421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.6161804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0587463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9330139160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8576965332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.92694091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.08770751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8854675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7966003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.6833190917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9015808105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.80499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.00787353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0091247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.84765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.1078796386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.86114501953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.80218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0252380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.02728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0412902832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.07354736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8635559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0469665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1089782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8901672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9694519042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.96917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.01116943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.94427490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.83447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9593505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.05181884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.76470947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.07470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0472106933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.01080322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.99359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.1598815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8885498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9485778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.80511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.987548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9101867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.97589111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.7840270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.05078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.0752258300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.89111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9266052246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.90655517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6943664550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.7492370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9681396484375
############ Running episode number: 980  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9897766113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.16845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.17523193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.5802307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0312194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7713928222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0778503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.0538635253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.89105224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.77783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0267028808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.84136962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0523376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1478576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.697509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0948181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9666748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7828674316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9079284667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9259338378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8350524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.80328369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0640869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.04217529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0831604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.08538818359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.06866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0398864746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9169616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.93896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0255126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.903076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.8729553222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.6952209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.2060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9364929199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.97637939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9123229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.13385009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8897705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7623596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.87310791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.08306884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0459289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0583801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8499450683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.873291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8175354003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.94390869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.91143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94183349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8809509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0606994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1084289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.12384033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0899353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0495300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9551086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9320373535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.90728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0871276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.1603698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0081787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8857116699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.09326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9870910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9882507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.0798034667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.0543212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9976501464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8917236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9283447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.5725402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9103088378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.2254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0262145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.953369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.91961669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.07672119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0323181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8721008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.09747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.07568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0887451171875
############ Running episode number: 981  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1496276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.22125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9182434082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.80584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.02899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.92950439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8257141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.16241455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.91619873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.99542236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0018005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1568908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.85076904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0544128417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7057800292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9461669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0195617675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.81549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.88916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0393981933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8740539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8934020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.98974609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.113525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9265441894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.03753662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0625305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.3224792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9588623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9920959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.02447509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8229675292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8374328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8958435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98785400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9461364746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.2652893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8692932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0794677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.95465087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9029235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.86077880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0611877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.80218505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.95513916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.10107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9450988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.1573791503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.803955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.02313232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.86614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.98297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8333435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0658874511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0306396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8356628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8276062011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.92999267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.94915771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.86822509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8544006347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.00128173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9067687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.077392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9412536621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9137268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8380432128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9343566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9404602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.73651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.94903564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0667724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8097229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8540344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1224060058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9658508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9791259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9540710449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.84796142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9782409667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8496398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8883361816406
############ Running episode number: 982  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.1071472167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.05072021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.96881103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.91925048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.96728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.88836669921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9638977050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.98944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9588317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9147033691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0404052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9476318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0250549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0263366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.09454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.86309814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1101989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.01812744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.02587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.05206298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.00390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.1380615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9559020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0892639160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0631103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0508117675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9129638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.87188720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.7445068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9801940917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8582458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1084289550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0770263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0624084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9117736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.97430419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.75579833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0903625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.00830078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.00048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.28399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9643859863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8714904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8421630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.98736572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0614318847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.83270263671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0598449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8269958496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.87164306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.93621826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.84716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8645935058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0852966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.96490478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9018249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.13134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.82342529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.04302978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.07373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.94281005859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.10205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9938659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9968566894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0150451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9271545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8617858886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.93804931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0454406738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0688171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9538879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.98907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.96392822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7829895019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9548645019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.8446960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.11053466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0813903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9098205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9722595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.05914306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.84991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.6965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.943603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7879638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1950988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.93994140625
############ Running episode number: 983  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0321350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0038146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9757995605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0054626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9389953613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0704040527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.8799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.92852783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0813293457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9618835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.88262939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8375549316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9488220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.01434326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7035217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.65423583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.08001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.0411682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0153503417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9248352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8787841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8865966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8054504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1230773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8364562988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.01300048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1334533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.06427001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9526672363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.07379150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.72369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.94140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.06103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.89410400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9514465332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.5732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7418212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.95294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9337463378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.95172119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8822326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.89019775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.6789245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.66357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9451904296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9459533691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.71270751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.80877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8125305175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.10662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9607849121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.2239990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9111022949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.94219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.10882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0350646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.6806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0402526855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1507568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2224426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9497985839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7253112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9473571777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7768249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.85638427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1566162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9061279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.72186279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0132141113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.79150390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.1798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.88812255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.1554870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.07049560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9364013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.97491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.89093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.894775390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.08013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.2393493652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8956604003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.2784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8794250488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.1318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.1148681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.99371337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8988342285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9776916503906
############ Running episode number: 984  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.00115966796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.91265869140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6025085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.0496520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.87677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.8353576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.86431884765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7836608886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0887756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.78192138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.09149169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.09552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0979309082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.7165222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8133239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.73480224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7854919433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1217041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8441467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.08294677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9527893066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.7491760253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8564758300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.83892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0385437011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1894836425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0230407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.821044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9739074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9263610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0154724121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7553405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8651123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8468322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0197448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.96966552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.06854248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.92010498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.6783142089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9339294433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0222473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8319091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9933776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7934875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.012451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.79595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.7975158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9679870605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9146728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.6946105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.00042724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9501037597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.101806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.85723876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9036560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7240295410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0428161621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9750061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.6163330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.99957275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.91015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.828369140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9409484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8850402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9408264160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7840270996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.3413391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.99755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1083679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.0714416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9996643066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8541259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.0457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.1134033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.93853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9889221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0810852050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9748840332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8297119140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.7973937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.84930419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0279235839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0760803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0362243652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.83795166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9491882324219
############ Running episode number: 985  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.93212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.2300109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0875244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.06842041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7833251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.84661865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9188232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1688537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.91259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.2722473144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.05474853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.93572998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.860595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.0718078613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.17169189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.12908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7501525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.80841064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9430847167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8862609863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7996826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.59747314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0140686035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7370910644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.79827880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9122619628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1242370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.024169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.84918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.039794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.990478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.91046142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.99407958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8235778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7133483886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.85491943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.72235107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8099365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9273376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9095153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.1279602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8974304199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.11700439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.84405517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0677490234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0735778808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 359.0055236816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8398742675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.04803466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0249328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.99212646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8119812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9075927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0335998535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.06243896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.89654541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2130126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9681701660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.90093994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.7796936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8565368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8775634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9311828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7599182128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.961181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.6788330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.75274658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0643005371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.71142578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8541564941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.91094970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9013366699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7847595214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.95928955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9602355957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.8196105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9797058105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9577331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9823303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0858459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0904846191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9100341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7085266113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9322509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.9071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.03985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.91375732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9291076660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.858154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.6903381347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1058349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7348937988281
############ Running episode number: 986  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0505065917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9235534667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.90264892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1432189941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.76025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.97125244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.88934326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.8214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.73248291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.7677917480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7697448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9715576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9437255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8623352050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.876708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8799133300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.09613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.91461181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.10870361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.82427978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.1112365722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8197326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.8414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.10174560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8294982910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9961242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8460388183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8050231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.7996520996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9617919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.82977294921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9582214355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.0905456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.10125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0298767089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.0399169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9903869628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.17041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8584899902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0159912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.15936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.90802001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0517883300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.2376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7200622558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0118713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.91717529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0259704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.119873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0105895996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8129577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1082458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8879699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9088439941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9303894042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9361877441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1192932128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8591003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8810119628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0442199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9055480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.7164001464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.93560791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.02679443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9925231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9745788574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.08026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.80596923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8052978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.6614685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.91888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8314514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.91510009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8318786621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.99200439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.691162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.6882629394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0347900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.7958984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.95745849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0433349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.85906982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.87005615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.90325927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9372253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.03070068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9228210449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.7685241699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.3100891113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9093017578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9978332519531
############ Running episode number: 987  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8700866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.7851257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.94195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9374084472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2123718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.83941650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.13616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9532775878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0124206542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.18670654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.89605712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0755310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0690002441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.126708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.887939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.01153564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90118408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0561828613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.77117919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.08624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.80279541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.98309326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0209655761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.7784423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.88543701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9396057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0545654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.05438232421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0237121582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.88555908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.912353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.8133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.1675720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.02630615234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.8161315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9398498535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8793029785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2128601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8993835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9339904785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.8134460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.97967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8768615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.98638916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.214599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9242248535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9731140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.84417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.13238525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8214416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1198425292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9961242675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0499267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.17205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9659729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.00665283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.1175231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.7659912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9387512207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9792785644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.8072509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0958557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9288024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.08404541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0436706542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0143737792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1157531738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.7217102050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8504638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9511413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.10955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.7937316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.05712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8176574707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8252868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0514221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9521179199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0100402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1469421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.92926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.908935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.99822998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9753723144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.7690124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8133544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.8246765136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8399658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.96441650390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.87066650390625
############ Running episode number: 988  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8727722167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8186340332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0694885253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9538269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.040283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.85333251953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.09637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9790344238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.914794921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.884033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9731750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.10662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.95245361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8916931152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1319580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9775085449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.910400390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7256774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.90283203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.76385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2615661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8838806152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9358215332031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.75775146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0580749511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.0458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8386535644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8288879394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.0068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.10626220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.82830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.76123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0197448730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.08197021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0799255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9651794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.0116271972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7920837402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.8109130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7979431152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0754699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7972717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0425720214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.91558837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0313415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0246276855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8832092285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.04107666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1049499511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8969421386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8373718261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0976257324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8954772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.74932861328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8746032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9571228027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9781494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9884948730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.8433532714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.81597900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.19525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9564514160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.87786865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.0584716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8834228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0735168457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0451354980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8367004394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.8638610839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9810791015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9786682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9045104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.939697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.83233642578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.13470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.22088623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9359130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.6990661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9140319824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.089111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.6541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.1566467285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8459167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.0622253417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9842529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0245666503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.88623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.2073974609375
############ Running episode number: 989  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9035949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0830993652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1317443847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.7662048339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9067077636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.643798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1100158691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.08258056640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1440734863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8910217285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0614013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1560974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9012145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.03021240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9367370605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.0662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.8520812988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.08624267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0351867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.92254638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7969665527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.88507080078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0495300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.90460205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.055908203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9837646484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.15826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.98907470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.00799560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0003662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9634704589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9520568847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.1268615722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.1601867675781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8977966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0010986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.9166259765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.06109619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9154357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8573303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.14935302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9295654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.92376708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.7568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9139709472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8671569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.8154602050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8430480957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.94610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7910461425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8401794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.04559326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1449890136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0244445800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0582275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.944091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0548095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.7388000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.756103515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.215576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9793395996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9447326660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.98828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1493835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.96246337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.6353759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.73388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.20294189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.97796630859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.95281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9818420410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.7601013183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9637451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.85760498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.19219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.04315185546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.1174621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9026794433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9336853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.88128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.958740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.23358154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1373291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.9053649902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9033508300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0874328613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0658874511719
############ Running episode number: 990  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.95587158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9692687988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9519348144531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.1769104003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.02264404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.6860656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.09991455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1501159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8825988769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9913024902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1365661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.96563720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8368225097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7500915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1354064941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.1725769042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.2294616699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.1142272949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.97760009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8263244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8341979980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7943115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.0781555175781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8605651855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.1549072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.08184814453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1540222167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0411682128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.89678955078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87933349609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8798522949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9822692871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.9994812011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.08636474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9142150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.94873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0964660644531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7005310058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7969970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0821533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.69122314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.08221435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0885925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.77581787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9754333496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8981628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.888427734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.8800964355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9056396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8395690917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9230041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.9580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9489440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.03424072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0162353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.2347717285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.83477783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9093322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0473937988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8224792480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.732666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0425109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.87493896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.08050537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.7976989746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1369934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.111572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.7786560058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8897399902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.9718322753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8616943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.18023681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.9549255371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0400695800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.80877685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.2553405761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9431457519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1623229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8413391113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8685302734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.02923583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.6309509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.01605224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.96728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0009460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.86676025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9525451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.81500244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9051208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.1248779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0997009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.0903625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.7899169921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.1260070800781
############ Running episode number: 991  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0507507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9173889160156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.00372314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.7727966308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.0752868652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9591064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9517822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1092834472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0233154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.790771484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.87396240234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.90374755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.5739440917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.93695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.08026123046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0310974121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.978759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.1574401855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.87542724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.8582763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7655944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.10662841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9472961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0773620605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.98583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.96832275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.1370544433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9316711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.11541748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.7863464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.9800109863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9649353027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8611755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.2187194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.02984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.079833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8839416503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.7266845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.67205810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.817626953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7899475097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.93182373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7877502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0848693847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.94305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.98565673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9237060546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0083312988281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9158020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0774230957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.91802978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.16107177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0166320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0469970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.81622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.0162658691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.9154357910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.2359313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.87255859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8156433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0484924316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.1222839355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0356750488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.7486572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.93463134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.0287780761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9236145019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.1121826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.2240905761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9003601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.09613037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0464172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1002502441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 359.07745361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.9414978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8537292480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.74017333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0028991699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0491027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.984619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9377746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9109191894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1132507324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1233825683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.043701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.8433837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.9719543457031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.80694580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.06341552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9632568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.07086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.94073486328125
############ Running episode number: 992  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.8807373046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.95281982421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.029541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.9881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.1600341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.1033020019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9103698730469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.1460876464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.7585754394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1685485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9730529785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0232238769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.9345397949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.640380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.8752746582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.07318115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.70184326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.12884521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.9898376464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.04095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.0696105957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.9010925292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9087829589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.87042236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.991943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.0346984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0909423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.8905029296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0442810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.14874267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 359.1424560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.87225341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.05401611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.93414306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.7313537597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9391174316406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.86773681640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.96624755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.72998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8250732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.973876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.7731628417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.1124572753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.90576171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9232177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7944641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.7979736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9278259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0097961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9295349121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1062316894531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.05572509765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.11090087890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8499755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.90826416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.9866943359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1116638183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9040832519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.1173095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0909729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8985595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0740661621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.16015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 359.0028076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.086181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8475646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.798095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.8517761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.892333984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.0635070800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8929443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.70330810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.97314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.7718200683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.96917724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.926025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.9747009277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.041748046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9498596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8758239746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.1407470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0699462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.91607666015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8786926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8338317871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.96124267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.00457763671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.08123779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9413146972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.9527587890625
############ Running episode number: 993  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.94342041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.86724853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.9701232910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9961853027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.76824951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.9005126953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.0538330078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.04052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0054626464844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9190979003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.1156311035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.924072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9389343261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.7588195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0059509277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.05670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.9461975097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.94921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.04266357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7679138183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.6993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8253173828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7650451660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01678466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.8636169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9746398925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.6965637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.0527038574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.1385803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.8984069824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8666687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9684143066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8888854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1446838378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9941711425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.0350646972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.792236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.86614990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.7904968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.2124938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.86883544921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.8945007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.0240173339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.954345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0943908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.1373596191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9653625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.7256774902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.07476806640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9411926269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97198486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.0829772949219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 358.9809875488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.759033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.6924743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.02301025390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.8599548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.8976745605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.00604248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.95782470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.3213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.88824462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8695068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9734802246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9348449707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.1398010253906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9053039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.0501708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0754699707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.8504943847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.1438903808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9751892089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.84454345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9053039550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.7684020996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.20843505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.08135986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.6806945800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9865417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.94989013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.97705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 359.0150146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.866455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9627380371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.85882568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.8075866699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.2510986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.89971923828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9308776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8827209472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0876159667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.96734619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 359.0474548339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.6909484863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9312744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.0296936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.92413330078125
############ Running episode number: 994  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.0389404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0833435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.8786315917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.1572265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.8623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7933044433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8722229003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.98193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 359.0774841308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.85260009765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9971008300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.0540466308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.8727111816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.1083679199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.90496826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.84783935546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1796569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.7976379394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 359.0486755371094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.9924621582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.90985107421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.01385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.98199462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.9693298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9367980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.83245849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.81829833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.986328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8512268066406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.93511962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.0457458496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9466247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.939453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.96795654296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.7049865722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.830810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.98162841796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.8616027832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.84881591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.10394287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.0362854003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.8997497558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.7155456542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.1067810058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.77874755859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.9839172363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0373229980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.97369384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.7236633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1636657714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9750671386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0738830566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8647155761719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0278015136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1617431640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.91070556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0000915527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.8382873535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.7408447265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 359.0919494628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.84912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.8701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.967529296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.77374267578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.69195556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9417724609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.0063171386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.78851318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9192199707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9687194824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.75390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.98675537109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.06329345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0516357421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9869384765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.80712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9096984863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.02728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.7655944824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8639221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.8418884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 359.0558776855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.90216064453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.7667236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8861083984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.0169677734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.9467468261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8868103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0744934082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.83074951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8014221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0272216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9034729003906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.1065368652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.7783203125
############ Running episode number: 995  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.9753112792969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9200744628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0514221191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.3916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.2858581542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.94305419921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.9096374511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.00299072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.9065856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.86004638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.09552001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.9448547363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.79071044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.94354248046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7170104980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.11968994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.79754638671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.8302307128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1563415527344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.8336486816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.8797912597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.7416687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.81988525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1499938964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.6927795410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.72314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 359.09576416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9252624511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9384460449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.9189453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.93377685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.9231872558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.909912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9516906738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.79034423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.13214111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9500427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.89990234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.9605407714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.8843994140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.9509582519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.8863220214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 359.00067138671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 359.22265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.69482421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.02069091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.9869689941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.84619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.91162109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.8473205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.0326843261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9859313964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0392761230469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9429931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9516296386719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8035583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.88470458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.1347961425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.92742919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.8421936035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 359.1136169433594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.8395080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.96588134765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.8233337402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9216003417969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.0179748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9978332519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.76483154296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.980224609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9724426269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9289245605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.8660583496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 359.12835693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.1299743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.8763122558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0023193359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.1397705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.1337890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8920593261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 359.1289978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8517150878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0190124511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.94927978515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 359.0854187011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 359.0039978027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0691833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1170959472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8875427246094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.96417236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.89569091796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.7223815917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0313720703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.8326416015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.9114685058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.0598449707031
############ Running episode number: 996  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 359.2137145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8667907714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.11962890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 359.13995361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.99725341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0066833496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.6817321777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.7785949707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.0528259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.8090515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.1945495605469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.99298095703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 359.2167053222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.7064208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.7620849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.8890380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.91839599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.1331787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 358.82818603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.7327880859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.07806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.7989807128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 359.1208801269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9676208496094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.113037109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.01385498046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9070739746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.0252685546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.728271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8393249511719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.1212158203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 358.8637390136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.7921447753906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.98956298828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 359.15167236328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.9580383300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.75726318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 358.8975524902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.88946533203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9763488769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.927734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.71661376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.16363525390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.7255554199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8746032714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.29205322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.84527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 358.88299560546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.15765380859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.8840637207031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.96044921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.1625061035156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.9077453613281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.0804443359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 359.07293701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.97137451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.94378662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.05902099609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.1644287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.82177734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.785888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.211181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.0645446777344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9092712402344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 359.0414733886719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.76971435546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 359.19024658203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.9413757324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.90228271484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9073181152344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0284118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.8156433105469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9911804199219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 358.9300231933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.8629455566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.9647521972656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 359.04345703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9525146484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.86993408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8421325683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.90936279296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.86651611328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8446350097656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.888671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 359.0049743652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.09033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.8740234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.0528259277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.8628234863281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8345031738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.93475341796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.9738464355469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.92022705078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8841857910156
############ Running episode number: 997  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.88848876953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 359.0959777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 358.6583557128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.93939208984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.980712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.0860900878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 359.01220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 358.9000244140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.981689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9493713378906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0789489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 358.7108459472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.1343688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.97918701171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.0660705566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.9320068359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 358.96295166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.9326477050781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.7867736816406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.87371826171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.8853454589844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.15728759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.94219970703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.80712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.12481689453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.8826599121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.99072265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.87078857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.9527587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 358.8916320800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.8771057128906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.09173583984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.03631591796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9720153808594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.9335632324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.8603820800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 358.7676086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 358.9833984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.15972900390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.07904052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 358.99365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.1418151855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.85125732421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.87347412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 359.04693603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 359.0050048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8667297363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 359.0118103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.967041015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.05352783203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.0563659667969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.19610595703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.9819641113281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.02587890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.93408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.8615417480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7504577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.95428466796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.9737548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.0849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 358.9937438964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.97412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.19134521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.8772888183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.9847412109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.1120300292969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.1190490722656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.9154968261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9603576660156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.2435607910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.9111633300781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.9984130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0533752441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.937744140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.9574279785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.20074462890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 358.97723388671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.0616149902344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9020080566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.8213195800781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8795471191406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.8216247558594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.7889404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 359.0013732910156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.93206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.9322814941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9586181640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9324951171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.03082275390625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 358.8811950683594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 359.0629577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 359.143798828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.90509033203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9567565917969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.0904541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.26995849609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 359.25994873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 359.05718994140625
############ Running episode number: 998  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.822021484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.9723205566406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.0467224121094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.88653564453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 359.17388916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 359.104736328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.8650207519531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.1860656738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.98382568359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 359.1827392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 359.0956726074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0544738769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 358.9198303222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.837890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 359.1060485839844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 359.05670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.1626281738281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 359.12139892578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 359.0703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.04595947265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9510803222656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 359.2090759277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 359.0603942871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.9647216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 359.02105712890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 358.8206787109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.9404296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 359.084228515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 359.03338623046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 358.6753845214844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.0561218261719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.853759765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 358.8013916015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.0406494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 358.9570007324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 359.01849365234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.93365478515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.0004577636719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.00048828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0809326171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 358.9998779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.1856384277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 358.9511413574219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.91497802734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.9918212890625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9830627441406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.82220458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 359.0364074707031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.8301086425781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.88531494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.1130676269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 359.001220703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 358.9214782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 359.1166687011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0202331542969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 358.8814697265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 358.9493408203125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.7638854980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 358.9084777832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 358.7591552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 358.8493347167969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.0788269042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9657287597656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.0311584472656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.9234619140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 359.0800476074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 358.9846496582031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 358.9888000488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.7762756347656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9632263183594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.9583435058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 358.8775939941406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 358.7686767578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 359.00634765625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 359.0946960449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.778076171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 359.0636901855469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.0434265136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.07470703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 359.2212219238281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9466552734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9224853515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 359.453857421875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.955810546875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.8231201171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.77325439453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.95361328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8802185058594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9668884277344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.8731994628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 358.8117980957031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.07806396484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.9102478027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9446716308594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 359.2482604980469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.8128662109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 358.87677001953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 359.05511474609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.95318603515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.8722229003906
############ Running episode number: 999  ##############
Current State,action,reward,Response time,Next State:  (10, 12.280466385424507) 3 19.0 1029.06659875 (11.973514343585284, 11)
loss 358.99786376953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.973514343585284) 3 19.0 1000.80824137 (11.786394321941378, 11)
loss 358.8068542480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.786394321941378) 3 19.0 990.920419923 (11.61852219546234, 11)
loss 359.08001708984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.61852219546234) 3 19.0 982.049698353 (11.469111876584304, 11)
loss 358.9197998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.469111876584304) 3 19.0 974.154538113 (11.336751742492702, 11)
loss 358.82489013671875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.336751742492702) 3 19.0 967.160346038 (11.25610796929319, 11)
loss 358.7743835449219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.25610796929319) 3 19.0 962.898956888 (11.027107764209074, 11)
loss 358.89373779296875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.027107764209074) 3 19.0 950.798097136 (10.995673623987257, 11)
loss 359.03839111328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.995673623987257) 3 19.0 949.137050055 (10.931193889570471, 11)
loss 358.77484130859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.931193889570471) 3 19.0 945.729803224 (10.816918347608043, 11)
loss 358.9164123535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.816918347608043) 3 19.0 939.691239608 (10.819208572963639, 11)
loss 358.9659118652344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.819208572963639) 3 19.0 939.812260006 (10.768325938188134, 11)
loss 359.0510559082031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.768325938188134) 3 19.0 937.12351295 (10.772009508959538, 11)
loss 359.0911865234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.772009508959538) 3 19.0 937.318160694 (10.644925616761762, 11)
loss 358.88494873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.644925616761762) 3 19.0 930.602776506 (10.58735855349979, 11)
loss 358.87530517578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.58735855349979) 3 19.0 927.560809977 (10.552868829802469, 11)
loss 358.779541015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.552868829802469) 3 19.0 925.738299342 (10.553846649940214, 11)
loss 359.0464782714844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.553846649940214) 3 19.0 925.789969445 (10.489125480251131, 11)
loss 358.90948486328125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.489125480251131) 3 19.0 922.369964659 (10.448897752470936, 11)
loss 358.9959716796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.448897752470936) 3 19.0 920.244245637 (10.433149880183072, 11)
loss 359.06622314453125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.433149880183072) 3 19.0 919.412094444 (10.44185150623065, 11)
loss 358.9230041503906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.44185150623065) 3 19.0 919.871906942 (10.370942817486826, 11)
loss 358.79779052734375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.370942817486826) 3 19.0 916.124940439 (10.42733414151318, 11)
loss 358.94830322265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.42733414151318) 3 19.0 919.104778396 (10.388469398680568, 11)
loss 358.7237548828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.388469398680568) 3 19.0 917.051082408 (10.344006106602812, 11)
loss 358.9490051269531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.344006106602812) 3 19.0 914.701547126 (10.319026962956018, 11)
loss 359.016845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.319026962956018) 3 19.0 913.381595845 (10.30224719189987, 11)
loss 358.66845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.30224719189987) 3 19.0 912.494916918 (10.278181486298042, 11)
loss 358.92034912109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.278181486298042) 3 19.0 911.223233653 (10.268274366284802, 11)
loss 358.9493103027344
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.268274366284802) 3 19.0 910.69972028 (10.335411397720526, 11)
loss 359.09521484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.335411397720526) 3 19.0 914.247384359 (10.305649118067803, 11)
loss 359.22491455078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.305649118067803) 3 19.0 912.67468196 (10.24826025489064, 11)
loss 358.9068908691406
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.24826025489064) 3 19.0 909.642131904 (10.276491935146446, 11)
loss 359.0887145996094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.276491935146446) 3 19.0 911.133954163 (10.236991269871366, 11)
loss 359.1568298339844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236991269871366) 3 19.0 909.046654676 (10.236272697871373, 11)
loss 359.1636047363281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.236272697871373) 3 19.0 909.008683798 (10.369891240151098, 11)
loss 358.8892822265625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.369891240151098) 3 19.0 916.069372847 (10.316955310454549, 11)
loss 358.697998046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.316955310454549) 3 19.0 913.272125304 (10.333617326102203, 11)
loss 359.05670166015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.333617326102203) 3 19.0 914.152581784 (10.390165524255663, 11)
loss 359.1351318359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.390165524255663) 3 19.0 917.140709305 (10.425974763084863, 11)
loss 359.0926818847656
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.425974763084863) 3 19.0 919.032945938 (10.546025383098053, 11)
loss 359.0497741699219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.546025383098053) 3 19.0 925.376677007 (10.655373370049301, 11)
loss 359.0121154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.655373370049301) 3 19.0 931.154858096 (10.624473674922116, 11)
loss 359.0121154785156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.624473674922116) 3 19.0 929.522052234 (10.771376986314287, 11)
loss 358.7807312011719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.771376986314287) 3 19.0 937.284736847 (10.924797168745895, 11)
loss 358.83819580078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 10.924797168745895) 3 19.0 945.391786838 (11.039747673816453, 11)
loss 358.9340515136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.039747673816453) 3 19.0 951.466016946 (11.271571944085663, 11)
loss 358.9537353515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.271571944085663) 3 19.0 963.716106332 (11.670334358779868, 11)
loss 358.8132019042969
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.670334358779868) 3 19.0 984.787563682 (11.819721938468785, 11)
loss 358.7635192871094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 11.819721938468785) 3 19.0 992.681522335 (12.19918626616789, 11)
loss 358.8381042480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.19918626616789) 3 19.0 1012.73322757 (12.501496275411796, 11)
loss 359.0867614746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 12.501496275411796) 3 19.0 1028.70793389 (13.168618569876575, 11)
loss 358.9709167480469
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.168618569876575) 3 19.0 1063.96010023 (13.649658108197247, 11)
loss 359.00262451171875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 13.649658108197247) 3 19.0 1089.37925646 (14.283719188889453, 11)
loss 358.873046875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.283719188889453) 3 19.0 1122.88439768 (14.677479537099185, 11)
loss 359.0310363769531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 14.677479537099185) 3 19.0 1143.69153516 (15.353965082180355, 11)
loss 359.0898132324219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.353965082180355) 3 19.0 1179.43847566 (15.836943704090487, 11)
loss 359.1388244628906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.836943704090487) 3 19.0 1204.9600972 (16.466876895473597, 11)
loss 358.8409423828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.466876895473597) 3 19.0 1238.24711194 (16.871606159345866, 11)
loss 359.0335693359375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.871606159345866) 3 19.0 1259.63387034 (17.534967586021782, 11)
loss 359.0521545410156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.534967586021782) 3 19.0 1294.6873044 (17.669285735563751, 11)
loss 359.1600036621094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.669285735563751) 3 19.0 1301.78496219 (17.944480812078613, 11)
loss 359.18743896484375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.944480812078613) 3 19.0 1316.32685758 (18.385807405229915, 11)
loss 358.9475402832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.385807405229915) 3 19.0 1339.64749699 (18.671267839956315, 11)
loss 359.2054748535156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.671267839956315) 3 19.0 1354.73183582 (19.02839494033929, 11)
loss 358.93505859375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.02839494033929) 3 19.0 1373.60319427 (19.286321916040979, 11)
loss 358.89141845703125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.286321916040979) 3 19.0 1387.23260634 (19.340464848017284, 11)
loss 359.2039489746094
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.340464848017284) 3 19.0 1390.09363446 (19.213467265587269, 11)
loss 359.04144287109375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.213467265587269) 3 19.0 1383.38281107 (19.140765783401285, 11)
loss 358.75445556640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.140765783401285) 3 19.0 1379.54110953 (19.385636054792762, 11)
loss 358.9327392578125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.385636054792762) 3 19.0 1392.48057747 (19.223969507401588, 11)
loss 358.8740539550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.223969507401588) 3 19.0 1383.93777195 (19.25591252280865, 11)
loss 359.1855773925781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.25591252280865) 3 19.0 1385.62570908 (19.08360399753829, 11)
loss 359.1304931640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 19.08360399753829) 3 19.0 1376.52055872 (18.668181536495972, 11)
loss 358.77789306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.668181536495972) 3 19.0 1354.56874896 (18.375894992990247, 11)
loss 358.9296569824219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 18.375894992990247) 3 19.0 1339.12370397 (17.82724819986867, 11)
loss 358.7778625488281
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.82724819986867) 3 19.0 1310.13203606 (17.229782241685768, 11)
loss 358.8037414550781
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.229782241685768) 3 19.0 1278.56065924 (16.84211602880065, 11)
loss 359.02960205078125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.84211602880065) 3 19.0 1258.07554888 (16.237094554670044, 11)
loss 359.0628967285156
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.237094554670044) 3 19.0 1226.10492247 (15.950694610794756, 11)
loss 358.867919921875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.950694610794756) 3 19.0 1210.97093797 (15.828704162850809, 11)
loss 358.9106140136719
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.828704162850809) 3 19.0 1204.52470225 (15.550833128512703, 11)
loss 358.9095458984375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.550833128512703) 3 19.0 1189.84140354 (15.446694946204717, 11)
loss 358.8767395019531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.446694946204717) 3 19.0 1184.33851965 (15.750501603468638, 11)
loss 358.84039306640625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.750501603468638) 3 19.0 1200.39231205 (15.817158911312735, 11)
loss 358.72906494140625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.817158911312735) 3 19.0 1203.91462651 (15.829956988360925, 11)
loss 358.9986267089844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.829956988360925) 3 19.0 1204.59090422 (15.892373986997768, 11)
loss 358.964599609375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.892373986997768) 3 19.0 1207.88915169 (15.954793861767499, 11)
loss 358.8694152832031
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.954793861767499) 3 19.0 1211.18755114 (16.004586266677634, 11)
loss 358.9818115234375
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.004586266677634) 3 19.0 1213.81868812 (16.017694914042416, 11)
loss 358.9376525878906
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.017694914042416) 3 19.0 1214.51137704 (15.947547279389703, 11)
loss 359.1646728515625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 15.947547279389703) 3 19.0 1210.80462626 (16.11465619633363, 11)
loss 359.1343688964844
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.11465619633363) 3 19.0 1219.63501821 (16.147078378791146, 11)
loss 358.86065673828125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.147078378791146) 3 19.0 1221.34827555 (16.229253414601111, 11)
loss 358.9628601074219
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.229253414601111) 3 19.0 1225.69057988 (16.295120821876548, 11)
loss 358.79022216796875
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.295120821876548) 3 19.0 1229.17115431 (16.667936385136993, 11)
loss 358.9304504394531
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.667936385136993) 3 19.0 1248.87152463 (16.836383524612351, 11)
loss 359.10748291015625
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.836383524612351) 3 19.0 1257.77263112 (16.845818065953559, 11)
loss 358.93145751953125
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 16.845818065953559) 3 19.0 1258.27117243 (17.052961248403161, 11)
loss 358.8440856933594
Action +1 not possible so Scaled up by 0
Current State,action,reward,Response time,Next State:  (11, 17.052961248403161) 3 19.0 1269.21706044 (17.215992726625572, 11)
loss 358.82135009765625
In [83]:
###### Plots #######
plt.figure(figsize=(20,10),dpi=100)
plt.title("Episodic Rewards for 1000 episodes",size = 20)
plt.xlabel("Episodes",size = 20)
plt.ylabel("Reward",size = 20)
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.plot(range(len(episodic_reward)),episodic_reward,color = 'red')

plt.show()
In [108]:
###### Plots #######
plt.figure(figsize=(20,10),dpi=80)
plt.title("Response time for every iteration",size = 20)
plt.xlabel("Iteration",size = 20)
plt.ylabel("Response time",size = 20)
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.plot(np.arange(len(RT_list)),RT_list,linewidth = 2,alpha = 0.8,color = 'black')
#plt.plot(range(len(episodic_reward)),all_steps,color = 'black')

plt.show()
In [106]:
###### Plots #######
plt.figure(figsize=(20,10),dpi=80)
plt.title("Number of VMs used in complete simultion",size = 20)
plt.xlabel("Iteration",size = 20)
plt.ylabel("Number of VMs",size = 20)
plt.xticks(size = 20)
plt.yticks(size = 20)
vms = [i[1] for i in curr_state_list]
plt.plot(range(len(vms)),vms)
#plt.plot(range(len(episodic_reward)),all_steps,color = 'black')

plt.show()
In [188]:
###### Plots #######

out = torch.tensor(np.concatenate((current_net.state_to_highdim(np.average(workload_list)),np.array([6]).reshape(1,1)),axis = 1)).float()
val = current_net(out).detach().numpy()


plt.figure(figsize=(10,10),dpi=80)
plt.title("Q values for average input and average num vms",size = 20)
plt.xlabel("Action space",size = 20)
plt.ylabel("Q values",size = 20)
plt.xticks(size = 15)
plt.yticks(size = 15)

plt.vlines(range(val.shape[1]),[0],val.reshape(5,1),'red',linewidth = 3)
#plt.plot(range(len(episodic_reward)),all_steps,color = 'black')

plt.show()
In [78]:
calculate_response_time(10,2),calculate_response_time(10,3),calculate_response_time(10,10)
Out[78]:
(483.748594, 536.792686, 908.10133)
In [544]:
cloud_obj.reward(10,2),cloud_obj.reward(10,3),cloud_obj.reward(10,4),cloud_obj.reward(10,10)
Out[544]:
(-49.10127499999999, 27.0, 26.0, 20.0)
In [ ]:
def q_learning(total_episodes,max_steps,qtable,epsilon,learning_rate,gamma,decay_rate):
    # Q learning 

    reward = [] #list of rewards at the end of each episode
    all_steps = []

    for i in range(1,total_episodes):
        #at beginning of every episode reset the environment
        state = cloud_obj.reset_env()
        total_rewards = 0
        total_steps = 0
        count = 0
        for step in range(max_steps):

            action = eps_greedy(epsilon,state) #choose action according to epsilon-greedy
            _,curr_num_vm = state

            #get the current workload 
            if count==workload_length:
                count = 0
                
            curr_wl = workload_list[count]
            count = count + 1
            # now take a step in the environment 
            state,action,rew,next_state = cloud_obj.step(action,curr_wl,curr_num_vm)  #1returns next state, reward                                                     

            #store transition
            mem_obj.update_memory([state,action,rew,next_state])
            ##optimize model
            optimizer()

            total_rewards = total_rewards + rew

            state = next_state

            if(done=='True'):
                break

        #update target net 
        if i % 10 == 0:
            target_net.load_state_dict(current_net.state_dict())
        
        epsilon = 1*np.exp(-decay_rate*i)
        reward.append(total_rewards)
        all_steps.append(step)
    
    return reward,all_steps

To do :

  1. Define the q learning loop according to this, and write up the lines for experience replay.
In [470]:
# encode action for the command torch.gather properly. specifically convert [0,0,2] to [[0],[0],[2]]

t = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
bbb = np.array([0,0,1])
#t
bbb = np.array([0,0,1]).reshape(bbb.shape[0],1)
bbb
#arr = np.array([[0],[1],[2]])
#arr.shape , bbb.shape

t.gather(1,torch.tensor(bbb))

#t.gather(1,torch.tensor([[0],[1],[2]]))
#ac = torch.tensor([0,0,2])
#torch.cat((ac))
#t.gather(1,torch.tensor([]))



_,a,_ = obj.fit(x_train,y_train) #train the model
obj.predict(state_space[2]) #predict the state
obj.print_model_params() #print the updated model parameters 



#all of these work fine
obj.predict(state_space[0:4].reshape(-1,1))
xxx  = obj.state_to_highdim(state_space[0:3].reshape(-1,1))
obj(xxx).max(1)[1]
obj.predict(state_space[0:3].reshape(-1,1)).detach()


obj(obj.state_to_highdim(state_space[4:10].reshape(-1,1)))


eps_greedy(0.001,(100.0223,5))



x_train = data_set[:1000,:]
x_train = torch.tensor(x_train).float()
#create fake targets 
# allowed actions = 5 

y = np.vstack([state_space**0.5 + 0,state_space**0.5 + 100,
               state_space**0.5 + 200,state_space**0.5 + 300,state_space**0.5 + 400]).T

#y = state_space**2
y_train = y[:1000,:]#.reshape(1000,1)
y_train = torch.tensor(y_train).float()


#declare object
obj = NNet(x_train,y_train)



## Build features for input states 
## mapping them into high dimensions would produce a better estimate using neural networks

# builds mercer kernel 
featurizer = sklearn.pipeline.FeatureUnion([
        ("rbf1", RBFSampler(gamma=5.0, n_components=100)),
        ("rbf2", RBFSampler(gamma=2.0, n_components=100)),
        ("rbf3", RBFSampler(gamma=1.0, n_components=100)),
        ("rbf4", RBFSampler(gamma=0.5, n_components=100))
        ])
featurizer.fit(workload_list.reshape(-1,1))


data_set = featurizer.transform(workload_list.reshape(-1,1))


featurizer.transform(workload_list[0:5].reshape(-1,1))


#cloud_obj = cloud(workload_list[15],10)
#cloud_obj.reset_env()
#cloud_obj.action_space(4)
#cloud_obj.reward(workload_list[0],11)
#cloud_obj.num_vm
#workload_list.index(workload_list[10])
#np.where(workload_list == workload_list[10])[0][0]

#cloud_obj.step(2,workload_list[2])
#cloud_obj = cloud(workload_list[15],10)
#cloud_obj.step(1,workload_list[2])



#eps_greedy(0.001,100)
#cloud_obj = cloud(workload_list[10],9)
#cloud_obj.step(1,workload_list[workload_length-1])
#cloud_obj.reset_env()
#workload_list[workload_length-1]
eps_greedy(0.001,(100.0223,5))
workload_list[workload_length-1],workload_list[0]
Out[470]:
False